File tree Expand file tree Collapse file tree 8 files changed +28
-23
lines changed
tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform Expand file tree Collapse file tree 8 files changed +28
-23
lines changed Original file line number Diff line number Diff line change @@ -27,7 +27,6 @@ def get_attn_backend_cls(
2727 dtype ,
2828 kv_cache_dtype ,
2929 block_size ,
30- use_v1 ,
3130 use_mla ,
3231 has_sink ,
3332 use_sparse ,
Original file line number Diff line number Diff line change @@ -187,17 +187,33 @@ def _cached_get_attn_backend(
187187 # get device-specific attn_backend
188188 from vllm .platforms import current_platform
189189
190- attention_cls = current_platform .get_attn_backend_cls (
191- selected_backend ,
192- head_size ,
193- dtype ,
194- kv_cache_dtype ,
195- block_size ,
196- True ,
197- use_mla ,
198- has_sink ,
199- use_sparse ,
200- )
190+ try :
191+ attention_cls = current_platform .get_attn_backend_cls (
192+ selected_backend ,
193+ head_size ,
194+ dtype ,
195+ kv_cache_dtype ,
196+ block_size ,
197+ use_mla ,
198+ has_sink ,
199+ use_sparse ,
200+ )
201+ except TypeError :
202+ logger .warning_once (
203+ "use_v1 parameter for get_attn_backend_cls is deprecated and will "
204+ "be removed in the future. Please remove it from your plugin code."
205+ )
206+ attention_cls = current_platform .get_attn_backend_cls (
207+ selected_backend ,
208+ head_size ,
209+ dtype ,
210+ kv_cache_dtype ,
211+ block_size ,
212+ True ,
213+ use_mla ,
214+ has_sink ,
215+ use_sparse ,
216+ )
201217 if not attention_cls :
202218 raise ValueError (
203219 f"Invalid attention backend for { current_platform .device_name } "
Original file line number Diff line number Diff line change @@ -131,7 +131,6 @@ def get_attn_backend_cls(
131131 dtype : torch .dtype ,
132132 kv_cache_dtype : str | None ,
133133 block_size : int ,
134- use_v1 : bool ,
135134 use_mla : bool ,
136135 has_sink : bool ,
137136 use_sparse : bool ,
@@ -145,8 +144,6 @@ def get_attn_backend_cls(
145144 if use_sparse :
146145 raise NotImplementedError ("Sparse Attention is not supported on CPU." )
147146 logger .info ("Using Torch SDPA backend." )
148- if not use_v1 :
149- raise ValueError ("CPU backend only supports V1." )
150147 return "vllm.v1.attention.backends.cpu_attn.TorchSDPABackend"
151148
152149 @classmethod
Original file line number Diff line number Diff line change @@ -253,7 +253,6 @@ def get_attn_backend_cls(
253253 dtype ,
254254 kv_cache_dtype ,
255255 block_size ,
256- use_v1 ,
257256 use_mla ,
258257 has_sink ,
259258 use_sparse ,
Original file line number Diff line number Diff line change @@ -187,7 +187,6 @@ def get_attn_backend_cls(
187187 dtype : torch .dtype ,
188188 kv_cache_dtype : str | None ,
189189 block_size : int ,
190- use_v1 : bool ,
191190 use_mla : bool ,
192191 has_sink : bool ,
193192 use_sparse : bool ,
Original file line number Diff line number Diff line change @@ -222,7 +222,6 @@ def get_attn_backend_cls(
222222 dtype ,
223223 kv_cache_dtype ,
224224 block_size ,
225- use_v1 ,
226225 use_mla ,
227226 has_sink ,
228227 use_sparse ,
Original file line number Diff line number Diff line change @@ -59,7 +59,6 @@ def get_attn_backend_cls(
5959 dtype : torch .dtype ,
6060 kv_cache_dtype : str | None ,
6161 block_size : int ,
62- use_v1 : bool ,
6362 use_mla : bool ,
6463 has_sink ,
6564 use_sparse ,
@@ -71,8 +70,6 @@ def get_attn_backend_cls(
7170 if selected_backend != _Backend .PALLAS :
7271 logger .info ("Cannot use %s backend on TPU." , selected_backend )
7372
74- if not use_v1 :
75- raise ValueError ("TPU backend only supports V1." )
7673 logger .info ("Using Pallas V1 backend." )
7774 return "vllm.v1.attention.backends.pallas.PallasAttentionBackend"
7875
Original file line number Diff line number Diff line change @@ -49,7 +49,6 @@ def get_attn_backend_cls(
4949 dtype : torch .dtype ,
5050 kv_cache_dtype : str | None ,
5151 block_size : int ,
52- use_v1 : bool ,
5352 use_mla : bool ,
5453 has_sink : bool ,
5554 use_sparse ,
@@ -77,7 +76,7 @@ def get_attn_backend_cls(
7776 elif selected_backend :
7877 raise ValueError (
7978 f"Invalid attention backend for { cls .device_name } , "
80- f"with use_v1: { use_v1 } use_mla: { use_mla } "
79+ f"with use_mla: { use_mla } "
8180 )
8281
8382 logger .info ("Using Flash Attention backend." )
You can’t perform that action at this time.
0 commit comments