File tree Expand file tree Collapse file tree 7 files changed +31
-22
lines changed
tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform Expand file tree Collapse file tree 7 files changed +31
-22
lines changed Original file line number Diff line number Diff line change @@ -27,7 +27,6 @@ def get_attn_backend_cls(
2727 dtype ,
2828 kv_cache_dtype ,
2929 block_size ,
30- use_v1 ,
3130 use_mla ,
3231 has_sink ,
3332 use_sparse ,
Original file line number Diff line number Diff line change 11# SPDX-License-Identifier: Apache-2.0
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
4+ import inspect
45import os
56from collections .abc import Generator
67from contextlib import contextmanager
@@ -141,17 +142,35 @@ def _cached_get_attn_backend(
141142 # get device-specific attn_backend
142143 from vllm .platforms import current_platform
143144
144- attention_cls = current_platform .get_attn_backend_cls (
145- selected_backend ,
146- head_size ,
147- dtype ,
148- kv_cache_dtype ,
149- block_size ,
150- True ,
151- use_mla ,
152- has_sink ,
153- use_sparse ,
154- )
145+ sig = inspect .signature (current_platform .get_attn_backend_cls )
146+ if "use_v1" in sig .parameters :
147+ logger .warning_once (
148+ "use_v1 parameter for get_attn_backend_cls is deprecated and will "
149+ "be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
150+ "remove it from your plugin code."
151+ )
152+ attention_cls = current_platform .get_attn_backend_cls (
153+ selected_backend ,
154+ head_size ,
155+ dtype ,
156+ kv_cache_dtype ,
157+ block_size ,
158+ True , # use_v1
159+ use_mla ,
160+ has_sink ,
161+ use_sparse ,
162+ )
163+ else :
164+ attention_cls = current_platform .get_attn_backend_cls (
165+ selected_backend ,
166+ head_size ,
167+ dtype ,
168+ kv_cache_dtype ,
169+ block_size ,
170+ use_mla ,
171+ has_sink ,
172+ use_sparse ,
173+ )
155174 if not attention_cls :
156175 raise ValueError (
157176 f"Invalid attention backend for { current_platform .device_name } "
Original file line number Diff line number Diff line change @@ -131,7 +131,6 @@ def get_attn_backend_cls(
131131 dtype : torch .dtype ,
132132 kv_cache_dtype : str | None ,
133133 block_size : int ,
134- use_v1 : bool ,
135134 use_mla : bool ,
136135 has_sink : bool ,
137136 use_sparse : bool ,
@@ -144,8 +143,6 @@ def get_attn_backend_cls(
144143 raise NotImplementedError ("MLA is not supported on CPU." )
145144 if use_sparse :
146145 raise NotImplementedError ("Sparse Attention is not supported on CPU." )
147- if not use_v1 :
148- raise ValueError ("CPU backend only supports V1." )
149146 return AttentionBackendEnum .CPU_ATTN .get_path ()
150147
151148 @classmethod
Original file line number Diff line number Diff line change @@ -215,7 +215,6 @@ def get_attn_backend_cls(
215215 dtype : torch .dtype ,
216216 kv_cache_dtype : "CacheDType | None" ,
217217 block_size : int ,
218- use_v1 : bool ,
219218 use_mla : bool ,
220219 has_sink : bool ,
221220 use_sparse : bool ,
Original file line number Diff line number Diff line change @@ -213,7 +213,6 @@ def get_attn_backend_cls(
213213 dtype ,
214214 kv_cache_dtype ,
215215 block_size ,
216- use_v1 ,
217216 use_mla ,
218217 has_sink ,
219218 use_sparse ,
Original file line number Diff line number Diff line change @@ -58,7 +58,6 @@ def get_attn_backend_cls(
5858 dtype : torch .dtype ,
5959 kv_cache_dtype : str | None ,
6060 block_size : int ,
61- use_v1 : bool ,
6261 use_mla : bool ,
6362 has_sink ,
6463 use_sparse ,
@@ -70,8 +69,6 @@ def get_attn_backend_cls(
7069 if selected_backend != AttentionBackendEnum .PALLAS :
7170 logger .info ("Cannot use %s backend on TPU." , selected_backend )
7271
73- if not use_v1 :
74- raise ValueError ("TPU backend only supports V1." )
7572 logger .info ("Using Pallas V1 backend." )
7673 return AttentionBackendEnum .PALLAS .get_path ()
7774
Original file line number Diff line number Diff line change @@ -48,7 +48,6 @@ def get_attn_backend_cls(
4848 dtype : torch .dtype ,
4949 kv_cache_dtype : str | None ,
5050 block_size : int ,
51- use_v1 : bool ,
5251 use_mla : bool ,
5352 has_sink : bool ,
5453 use_sparse ,
@@ -77,7 +76,7 @@ def get_attn_backend_cls(
7776 elif selected_backend :
7877 raise ValueError (
7978 f"Invalid attention backend for { cls .device_name } , "
80- f"with use_v1: { use_v1 } use_mla: { use_mla } "
79+ f"with use_mla: { use_mla } "
8180 )
8281
8382 logger .info ("Using Flash Attention backend." )
You can’t perform that action at this time.
0 commit comments