Skip to content

Commit 5b6f280

Browse files
committed
[V0 deprecation] Deprecate use_v1 parameter
Signed-off-by: wangxiyuan <[email protected]>
1 parent 91864b7 commit 5b6f280

File tree

7 files changed

+31
-22
lines changed

7 files changed

+31
-22
lines changed

tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def get_attn_backend_cls(
2727
dtype,
2828
kv_cache_dtype,
2929
block_size,
30-
use_v1,
3130
use_mla,
3231
has_sink,
3332
use_sparse,

vllm/attention/selector.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import inspect
45
import os
56
from collections.abc import Generator
67
from contextlib import contextmanager
@@ -141,17 +142,35 @@ def _cached_get_attn_backend(
141142
# get device-specific attn_backend
142143
from vllm.platforms import current_platform
143144

144-
attention_cls = current_platform.get_attn_backend_cls(
145-
selected_backend,
146-
head_size,
147-
dtype,
148-
kv_cache_dtype,
149-
block_size,
150-
True,
151-
use_mla,
152-
has_sink,
153-
use_sparse,
154-
)
145+
sig = inspect.signature(current_platform.get_attn_backend_cls)
146+
if "use_v1" in sig.parameters:
147+
logger.warning_once(
148+
"use_v1 parameter for get_attn_backend_cls is deprecated and will "
149+
"be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
150+
"remove it from your plugin code."
151+
)
152+
attention_cls = current_platform.get_attn_backend_cls(
153+
selected_backend,
154+
head_size,
155+
dtype,
156+
kv_cache_dtype,
157+
block_size,
158+
True, # use_v1
159+
use_mla,
160+
has_sink,
161+
use_sparse,
162+
)
163+
else:
164+
attention_cls = current_platform.get_attn_backend_cls(
165+
selected_backend,
166+
head_size,
167+
dtype,
168+
kv_cache_dtype,
169+
block_size,
170+
use_mla,
171+
has_sink,
172+
use_sparse,
173+
)
155174
if not attention_cls:
156175
raise ValueError(
157176
f"Invalid attention backend for {current_platform.device_name}"

vllm/platforms/cpu.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ def get_attn_backend_cls(
131131
dtype: torch.dtype,
132132
kv_cache_dtype: str | None,
133133
block_size: int,
134-
use_v1: bool,
135134
use_mla: bool,
136135
has_sink: bool,
137136
use_sparse: bool,
@@ -144,8 +143,6 @@ def get_attn_backend_cls(
144143
raise NotImplementedError("MLA is not supported on CPU.")
145144
if use_sparse:
146145
raise NotImplementedError("Sparse Attention is not supported on CPU.")
147-
if not use_v1:
148-
raise ValueError("CPU backend only supports V1.")
149146
return AttentionBackendEnum.CPU_ATTN.get_path()
150147

151148
@classmethod

vllm/platforms/interface.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ def get_attn_backend_cls(
215215
dtype: torch.dtype,
216216
kv_cache_dtype: "CacheDType | None",
217217
block_size: int,
218-
use_v1: bool,
219218
use_mla: bool,
220219
has_sink: bool,
221220
use_sparse: bool,

vllm/platforms/rocm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,6 @@ def get_attn_backend_cls(
213213
dtype,
214214
kv_cache_dtype,
215215
block_size,
216-
use_v1,
217216
use_mla,
218217
has_sink,
219218
use_sparse,

vllm/platforms/tpu.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def get_attn_backend_cls(
5858
dtype: torch.dtype,
5959
kv_cache_dtype: str | None,
6060
block_size: int,
61-
use_v1: bool,
6261
use_mla: bool,
6362
has_sink,
6463
use_sparse,
@@ -70,8 +69,6 @@ def get_attn_backend_cls(
7069
if selected_backend != AttentionBackendEnum.PALLAS:
7170
logger.info("Cannot use %s backend on TPU.", selected_backend)
7271

73-
if not use_v1:
74-
raise ValueError("TPU backend only supports V1.")
7572
logger.info("Using Pallas V1 backend.")
7673
return AttentionBackendEnum.PALLAS.get_path()
7774

vllm/platforms/xpu.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def get_attn_backend_cls(
4848
dtype: torch.dtype,
4949
kv_cache_dtype: str | None,
5050
block_size: int,
51-
use_v1: bool,
5251
use_mla: bool,
5352
has_sink: bool,
5453
use_sparse,
@@ -77,7 +76,7 @@ def get_attn_backend_cls(
7776
elif selected_backend:
7877
raise ValueError(
7978
f"Invalid attention backend for {cls.device_name}, "
80-
f"with use_v1: {use_v1} use_mla: {use_mla}"
79+
f"with use_mla: {use_mla}"
8180
)
8281

8382
logger.info("Using Flash Attention backend.")

0 commit comments

Comments
 (0)