File tree Expand file tree Collapse file tree 8 files changed +31
-35
lines changed
tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform Expand file tree Collapse file tree 8 files changed +31
-35
lines changed Original file line number Diff line number Diff line change @@ -27,7 +27,6 @@ def get_attn_backend_cls(
2727 dtype ,
2828 kv_cache_dtype ,
2929 block_size ,
30- use_v1 ,
3130 use_mla ,
3231 has_sink ,
3332 use_sparse ,
Original file line number Diff line number Diff line change 11# SPDX-License-Identifier: Apache-2.0
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
4+ import inspect
45import os
56from collections .abc import Generator
67from contextlib import contextmanager
@@ -141,17 +142,35 @@ def _cached_get_attn_backend(
141142 # get device-specific attn_backend
142143 from vllm .platforms import current_platform
143144
144- attention_cls = current_platform .get_attn_backend_cls (
145- selected_backend ,
146- head_size ,
147- dtype ,
148- kv_cache_dtype ,
149- block_size ,
150- True ,
151- use_mla ,
152- has_sink ,
153- use_sparse ,
154- )
145+ sig = inspect .signature (current_platform .get_attn_backend_cls )
146+ if "use_v1" in sig .parameters :
147+ logger .warning_once (
148+ "use_v1 parameter for get_attn_backend_cls is deprecated and will "
149+ "be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
150+ "remove it from your plugin code."
151+ )
152+ attention_cls = current_platform .get_attn_backend_cls (
153+ selected_backend ,
154+ head_size ,
155+ dtype ,
156+ kv_cache_dtype ,
157+ block_size ,
158+ True , # use_v1
159+ use_mla ,
160+ has_sink ,
161+ use_sparse ,
162+ )
163+ else :
164+ attention_cls = current_platform .get_attn_backend_cls (
165+ selected_backend ,
166+ head_size ,
167+ dtype ,
168+ kv_cache_dtype ,
169+ block_size ,
170+ use_mla ,
171+ has_sink ,
172+ use_sparse ,
173+ )
155174 if not attention_cls :
156175 raise ValueError (
157176 f"Invalid attention backend for { current_platform .device_name } "
Original file line number Diff line number Diff line change @@ -131,7 +131,6 @@ def get_attn_backend_cls(
131131 dtype : torch .dtype ,
132132 kv_cache_dtype : str | None ,
133133 block_size : int ,
134- use_v1 : bool ,
135134 use_mla : bool ,
136135 has_sink : bool ,
137136 use_sparse : bool ,
@@ -144,8 +143,6 @@ def get_attn_backend_cls(
144143 raise NotImplementedError ("MLA is not supported on CPU." )
145144 if use_sparse :
146145 raise NotImplementedError ("Sparse Attention is not supported on CPU." )
147- if not use_v1 :
148- raise ValueError ("CPU backend only supports V1." )
149146 return AttentionBackendEnum .CPU_ATTN .get_path ()
150147
151148 @classmethod
Original file line number Diff line number Diff line change @@ -336,17 +336,10 @@ def get_attn_backend_cls(
336336 dtype : torch .dtype ,
337337 kv_cache_dtype : "CacheDType | None" ,
338338 block_size : int | None ,
339- use_v1 : bool ,
340339 use_mla : bool ,
341340 has_sink : bool ,
342341 use_sparse : bool ,
343342 ) -> str :
344- if not use_v1 :
345- raise RuntimeError (
346- "V0 attention backends have been removed. Set VLLM_USE_V1=1 "
347- "to select a supported backend."
348- )
349-
350343 device_capability = cls .get_device_capability ()
351344 assert device_capability is not None
352345
Original file line number Diff line number Diff line change @@ -215,7 +215,6 @@ def get_attn_backend_cls(
215215 dtype : torch .dtype ,
216216 kv_cache_dtype : "CacheDType | None" ,
217217 block_size : int ,
218- use_v1 : bool ,
219218 use_mla : bool ,
220219 has_sink : bool ,
221220 use_sparse : bool ,
Original file line number Diff line number Diff line change @@ -213,7 +213,6 @@ def get_attn_backend_cls(
213213 dtype ,
214214 kv_cache_dtype ,
215215 block_size ,
216- use_v1 ,
217216 use_mla ,
218217 has_sink ,
219218 use_sparse ,
@@ -224,12 +223,6 @@ def get_attn_backend_cls(
224223 if use_sparse :
225224 raise NotImplementedError ("Sparse Attention is not supported on ROCm." )
226225
227- if not use_v1 :
228- raise RuntimeError (
229- "V0 attention backends have been removed. Set VLLM_USE_V1=1 "
230- "to select a supported backend."
231- )
232-
233226 if use_mla :
234227 if selected_backend is None :
235228 selected_backend = (
Original file line number Diff line number Diff line change @@ -58,7 +58,6 @@ def get_attn_backend_cls(
5858 dtype : torch .dtype ,
5959 kv_cache_dtype : str | None ,
6060 block_size : int ,
61- use_v1 : bool ,
6261 use_mla : bool ,
6362 has_sink ,
6463 use_sparse ,
@@ -70,8 +69,6 @@ def get_attn_backend_cls(
7069 if selected_backend != AttentionBackendEnum .PALLAS :
7170 logger .info ("Cannot use %s backend on TPU." , selected_backend )
7271
73- if not use_v1 :
74- raise ValueError ("TPU backend only supports V1." )
7572 logger .info ("Using Pallas V1 backend." )
7673 return AttentionBackendEnum .PALLAS .get_path ()
7774
Original file line number Diff line number Diff line change @@ -48,7 +48,6 @@ def get_attn_backend_cls(
4848 dtype : torch .dtype ,
4949 kv_cache_dtype : str | None ,
5050 block_size : int ,
51- use_v1 : bool ,
5251 use_mla : bool ,
5352 has_sink : bool ,
5453 use_sparse ,
@@ -76,7 +75,7 @@ def get_attn_backend_cls(
7675 elif selected_backend :
7776 raise ValueError (
7877 f"Invalid attention backend for { cls .device_name } , "
79- f"with use_v1: { use_v1 } use_mla: { use_mla } "
78+ f"with use_mla: { use_mla } "
8079 )
8180
8281 logger .info ("Using Flash Attention backend." )
You can’t perform that action at this time.
0 commit comments