Skip to content

Commit 10138c9

Browse files
authored
[V0 deprecation] Deprecate use_v1 parameter (vllm-project#28112)
Signed-off-by: wangxiyuan <[email protected]>
1 parent a9d18b5 commit 10138c9

File tree

8 files changed

+31
-35
lines changed

8 files changed

+31
-35
lines changed

tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def get_attn_backend_cls(
2727
dtype,
2828
kv_cache_dtype,
2929
block_size,
30-
use_v1,
3130
use_mla,
3231
has_sink,
3332
use_sparse,

vllm/attention/selector.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import inspect
45
import os
56
from collections.abc import Generator
67
from contextlib import contextmanager
@@ -141,17 +142,35 @@ def _cached_get_attn_backend(
141142
# get device-specific attn_backend
142143
from vllm.platforms import current_platform
143144

144-
attention_cls = current_platform.get_attn_backend_cls(
145-
selected_backend,
146-
head_size,
147-
dtype,
148-
kv_cache_dtype,
149-
block_size,
150-
True,
151-
use_mla,
152-
has_sink,
153-
use_sparse,
154-
)
145+
sig = inspect.signature(current_platform.get_attn_backend_cls)
146+
if "use_v1" in sig.parameters:
147+
logger.warning_once(
148+
"use_v1 parameter for get_attn_backend_cls is deprecated and will "
149+
"be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
150+
"remove it from your plugin code."
151+
)
152+
attention_cls = current_platform.get_attn_backend_cls(
153+
selected_backend,
154+
head_size,
155+
dtype,
156+
kv_cache_dtype,
157+
block_size,
158+
True, # use_v1
159+
use_mla,
160+
has_sink,
161+
use_sparse,
162+
)
163+
else:
164+
attention_cls = current_platform.get_attn_backend_cls(
165+
selected_backend,
166+
head_size,
167+
dtype,
168+
kv_cache_dtype,
169+
block_size,
170+
use_mla,
171+
has_sink,
172+
use_sparse,
173+
)
155174
if not attention_cls:
156175
raise ValueError(
157176
f"Invalid attention backend for {current_platform.device_name}"

vllm/platforms/cpu.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ def get_attn_backend_cls(
131131
dtype: torch.dtype,
132132
kv_cache_dtype: str | None,
133133
block_size: int,
134-
use_v1: bool,
135134
use_mla: bool,
136135
has_sink: bool,
137136
use_sparse: bool,
@@ -144,8 +143,6 @@ def get_attn_backend_cls(
144143
raise NotImplementedError("MLA is not supported on CPU.")
145144
if use_sparse:
146145
raise NotImplementedError("Sparse Attention is not supported on CPU.")
147-
if not use_v1:
148-
raise ValueError("CPU backend only supports V1.")
149146
return AttentionBackendEnum.CPU_ATTN.get_path()
150147

151148
@classmethod

vllm/platforms/cuda.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -336,17 +336,10 @@ def get_attn_backend_cls(
336336
dtype: torch.dtype,
337337
kv_cache_dtype: "CacheDType | None",
338338
block_size: int | None,
339-
use_v1: bool,
340339
use_mla: bool,
341340
has_sink: bool,
342341
use_sparse: bool,
343342
) -> str:
344-
if not use_v1:
345-
raise RuntimeError(
346-
"V0 attention backends have been removed. Set VLLM_USE_V1=1 "
347-
"to select a supported backend."
348-
)
349-
350343
device_capability = cls.get_device_capability()
351344
assert device_capability is not None
352345

vllm/platforms/interface.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ def get_attn_backend_cls(
215215
dtype: torch.dtype,
216216
kv_cache_dtype: "CacheDType | None",
217217
block_size: int,
218-
use_v1: bool,
219218
use_mla: bool,
220219
has_sink: bool,
221220
use_sparse: bool,

vllm/platforms/rocm.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,6 @@ def get_attn_backend_cls(
213213
dtype,
214214
kv_cache_dtype,
215215
block_size,
216-
use_v1,
217216
use_mla,
218217
has_sink,
219218
use_sparse,
@@ -224,12 +223,6 @@ def get_attn_backend_cls(
224223
if use_sparse:
225224
raise NotImplementedError("Sparse Attention is not supported on ROCm.")
226225

227-
if not use_v1:
228-
raise RuntimeError(
229-
"V0 attention backends have been removed. Set VLLM_USE_V1=1 "
230-
"to select a supported backend."
231-
)
232-
233226
if use_mla:
234227
if selected_backend is None:
235228
selected_backend = (

vllm/platforms/tpu.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def get_attn_backend_cls(
5858
dtype: torch.dtype,
5959
kv_cache_dtype: str | None,
6060
block_size: int,
61-
use_v1: bool,
6261
use_mla: bool,
6362
has_sink,
6463
use_sparse,
@@ -70,8 +69,6 @@ def get_attn_backend_cls(
7069
if selected_backend != AttentionBackendEnum.PALLAS:
7170
logger.info("Cannot use %s backend on TPU.", selected_backend)
7271

73-
if not use_v1:
74-
raise ValueError("TPU backend only supports V1.")
7572
logger.info("Using Pallas V1 backend.")
7673
return AttentionBackendEnum.PALLAS.get_path()
7774

vllm/platforms/xpu.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def get_attn_backend_cls(
4848
dtype: torch.dtype,
4949
kv_cache_dtype: str | None,
5050
block_size: int,
51-
use_v1: bool,
5251
use_mla: bool,
5352
has_sink: bool,
5453
use_sparse,
@@ -76,7 +75,7 @@ def get_attn_backend_cls(
7675
elif selected_backend:
7776
raise ValueError(
7877
f"Invalid attention backend for {cls.device_name}, "
79-
f"with use_v1: {use_v1} use_mla: {use_mla}"
78+
f"with use_mla: {use_mla}"
8079
)
8180

8281
logger.info("Using Flash Attention backend.")

0 commit comments

Comments
 (0)