Skip to content

Commit b009b12

Browse files
committed
[V0 deprecation] Deprecate use_v1 parameter
Signed-off-by: wangxiyuan <[email protected]>
1 parent 0976711 commit b009b12

File tree

8 files changed

+28
-23
lines changed

8 files changed

+28
-23
lines changed

tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def get_attn_backend_cls(
2727
dtype,
2828
kv_cache_dtype,
2929
block_size,
30-
use_v1,
3130
use_mla,
3231
has_sink,
3332
use_sparse,

vllm/attention/selector.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,33 @@ def _cached_get_attn_backend(
187187
# get device-specific attn_backend
188188
from vllm.platforms import current_platform
189189

190-
attention_cls = current_platform.get_attn_backend_cls(
191-
selected_backend,
192-
head_size,
193-
dtype,
194-
kv_cache_dtype,
195-
block_size,
196-
True,
197-
use_mla,
198-
has_sink,
199-
use_sparse,
200-
)
190+
try:
191+
attention_cls = current_platform.get_attn_backend_cls(
192+
selected_backend,
193+
head_size,
194+
dtype,
195+
kv_cache_dtype,
196+
block_size,
197+
use_mla,
198+
has_sink,
199+
use_sparse,
200+
)
201+
except TypeError:
202+
logger.warning_once(
203+
"use_v1 parameter for get_attn_backend_cls is deprecated and will "
204+
"be removed in the future. Please remove it from your plugin code."
205+
)
206+
attention_cls = current_platform.get_attn_backend_cls(
207+
selected_backend,
208+
head_size,
209+
dtype,
210+
kv_cache_dtype,
211+
block_size,
212+
True,
213+
use_mla,
214+
has_sink,
215+
use_sparse,
216+
)
201217
if not attention_cls:
202218
raise ValueError(
203219
f"Invalid attention backend for {current_platform.device_name}"

vllm/platforms/cpu.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ def get_attn_backend_cls(
131131
dtype: torch.dtype,
132132
kv_cache_dtype: str | None,
133133
block_size: int,
134-
use_v1: bool,
135134
use_mla: bool,
136135
has_sink: bool,
137136
use_sparse: bool,
@@ -145,8 +144,6 @@ def get_attn_backend_cls(
145144
if use_sparse:
146145
raise NotImplementedError("Sparse Attention is not supported on CPU.")
147146
logger.info("Using Torch SDPA backend.")
148-
if not use_v1:
149-
raise ValueError("CPU backend only supports V1.")
150147
return "vllm.v1.attention.backends.cpu_attn.TorchSDPABackend"
151148

152149
@classmethod

vllm/platforms/cuda.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ def get_attn_backend_cls(
253253
dtype,
254254
kv_cache_dtype,
255255
block_size,
256-
use_v1,
257256
use_mla,
258257
has_sink,
259258
use_sparse,

vllm/platforms/interface.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ def get_attn_backend_cls(
187187
dtype: torch.dtype,
188188
kv_cache_dtype: str | None,
189189
block_size: int,
190-
use_v1: bool,
191190
use_mla: bool,
192191
has_sink: bool,
193192
use_sparse: bool,

vllm/platforms/rocm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ def get_attn_backend_cls(
222222
dtype,
223223
kv_cache_dtype,
224224
block_size,
225-
use_v1,
226225
use_mla,
227226
has_sink,
228227
use_sparse,

vllm/platforms/tpu.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ def get_attn_backend_cls(
5959
dtype: torch.dtype,
6060
kv_cache_dtype: str | None,
6161
block_size: int,
62-
use_v1: bool,
6362
use_mla: bool,
6463
has_sink,
6564
use_sparse,
@@ -71,8 +70,6 @@ def get_attn_backend_cls(
7170
if selected_backend != _Backend.PALLAS:
7271
logger.info("Cannot use %s backend on TPU.", selected_backend)
7372

74-
if not use_v1:
75-
raise ValueError("TPU backend only supports V1.")
7673
logger.info("Using Pallas V1 backend.")
7774
return "vllm.v1.attention.backends.pallas.PallasAttentionBackend"
7875

vllm/platforms/xpu.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def get_attn_backend_cls(
4949
dtype: torch.dtype,
5050
kv_cache_dtype: str | None,
5151
block_size: int,
52-
use_v1: bool,
5352
use_mla: bool,
5453
has_sink: bool,
5554
use_sparse,
@@ -77,7 +76,7 @@ def get_attn_backend_cls(
7776
elif selected_backend:
7877
raise ValueError(
7978
f"Invalid attention backend for {cls.device_name}, "
80-
f"with use_v1: {use_v1} use_mla: {use_mla}"
79+
f"with use_mla: {use_mla}"
8180
)
8281

8382
logger.info("Using Flash Attention backend.")

0 commit comments

Comments
 (0)