|
1 | 1 | { |
2 | | - "cmd": "/home/changsu/miniconda3/bin/genai-bench --api_backend openai --api_base http://localhost:8082 --api_key your-openai-api-key --api_model_name vllm-model --model_tokenizer /mnt/data/models/Meta-Llama-3.1-70B-Instruct --task chat --run_time 1 --server_engine vLLM --server_gpu_type H100 --server_version v0.6.0 --server_gpu_count 4 --model Meta-Llama-3.1-70B-Instruct --num_concurrency (1, 2, 4, 8, 16, 32, 64, 128, 256) --traffic_scenario N(480,240)/(300,150) --traffic_scenario D(100,100) --traffic_scenario D(100,1000) --traffic_scenario D(2000,200) --traffic_scenario D(7800,200)", |
| 2 | + "cmd": "/home/changsu/miniconda3/bin/genai-bench --api_backend openai --api_base http://localhost:8082 --api_key your-openai-api-key --api_model_name meta-llama/Meta-Llama-3-70B-Instruct --model_tokenizer /mnt/data/models/Meta-Llama-3.1-70B-Instruct --task chat --run_time 1 --server_engine SGLang --server_gpu_type H100 --server_version v0.6.0 --server_gpu_count 4 --model Meta-Llama-3.1-70B-Instruct --num_concurrency (1, 2, 4, 8, 16, 32, 64, 128, 256) --traffic_scenario N(480,240)/(300,150) --traffic_scenario D(100,100) --traffic_scenario D(100,1000) --traffic_scenario D(2000,200) --traffic_scenario D(7800,200)", |
3 | 3 | "benchmark_version": "", |
4 | 4 | "api_backend": "openai", |
5 | 5 | "auth_config": { |
6 | 6 | "api_base": "http://localhost:8084", |
7 | 7 | "api_key": "********_key" |
8 | 8 | }, |
9 | | - "api_model_name": "vllm-model", |
| 9 | + "api_model_name": "meta-llama/Meta-Llama-3-70B-Instruct", |
10 | 10 | "server_model_tokenizer": "/mnt/data/models/Meta-Llama-3.1-70B-Instruct", |
11 | 11 | "model": "Meta-Llama-3.1-70B-Instruct", |
12 | 12 | "task": "text-to-text", |
|
29 | 29 | "D(7800,200)" |
30 | 30 | ], |
31 | 31 | "additional_request_params": {}, |
32 | | - "server_engine": "vLLM", |
| 32 | + "server_engine": "SGLang", |
33 | 33 | "server_version": "v0.6.0", |
34 | 34 | "server_gpu_type": "H100", |
35 | 35 | "server_gpu_count": "4", |
36 | 36 | "max_time_per_run_s": 60, |
37 | 37 | "max_requests_per_run": 300, |
38 | | - "experiment_folder_name": "/home/changsu/openai_vLLM_v0.6.0_chat_vllm-model_tokenizer__mnt_data_models_Meta-Llama-3.1-70B-Instruct_20240906_165433", |
| 38 | + "experiment_folder_name": "/home/changsu/openai_SGLang_v0.6.0_chat_sglang-model_tokenizer__mnt_data_models_Meta-Llama-3.1-70B-Instruct_20240906_165433", |
39 | 39 | "dataset_path": null, |
40 | 40 | "dataset_prompt_column_index": 0, |
41 | 41 | "character_token_ratio": 4.059085841694538 |
|
0 commit comments