We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 95d7e20 commit 03db7c4Copy full SHA for 03db7c4
run-vllm.py
@@ -33,8 +33,8 @@ def __call__(
33
m = params["_max_tokens"]
34
kwargs["max_num_batched_tokens"] = m
35
kwargs["max_model_len"] = min(m, model_max_tokens or m, model_seq_length or m)
36
- if kwargs["tensor_parallel_size"] > 0:
37
- tensor_parallel_size = kwargs["tensor_parallel_size"]
+ if params["tensor_parallel_size"] > 0:
+ tensor_parallel_size = params["tensor_parallel_size"]
38
else:
39
tensor_parallel_size = math.gcd(
40
torch.cuda.device_count(),
0 commit comments