Skip to content

Commit ff02199

Browse files
key4ngkey4ng
andauthored
misc: Fix doc naming and use flexible plot in cli (#65)
Co-authored-by: key4ng <[email protected]>
1 parent f8d66e4 commit ff02199

File tree

13 files changed

+89
-84
lines changed

13 files changed

+89
-84
lines changed

.coveragerc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ omit =
33
genai_bench/cli/report.py
44
genai_bench/analysis/excel_report.py
55
genai_bench/analysis/plot_report.py
6+
genai_bench/analysis/flexible_plot_report.py
7+
genai_bench/analysis/plot_config.py
68
genai_bench/ui/*
79
genai_bench/logging.py
810
tests/*

docs/user-guide/multi-cloud-auth-storage.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ genai-bench benchmark \
176176
--max-requests-per-run 100 \
177177
--max-time-per-run 10
178178
```
179-
**Note:** for Dedicated model, the `--api-model-name` is just a placeholder, the model depends on the the endpointId you provided
179+
**Note:** for Dedicated model, the `--api-model-name` is just a placeholder, the model depends on the endpointId you provided
180180

181181
**Advanced features:**
182182
```bash
@@ -343,7 +343,7 @@ vLLM and SGLang use OpenAI-compatible APIs with optional authentication.
343343
**Example:**
344344
```bash
345345
genai-bench benchmark \
346-
--api-backend vllm \
346+
--api-backend sglang \
347347
--api-base http://localhost:8000 \
348348
--api-key optional-key \
349349
--api-model-name meta-llama/Llama-2-7b-hf \
@@ -657,4 +657,4 @@ The main changes are:
657657

658658
- `--bucket``--storage-bucket`
659659
- `--prefix``--storage-prefix`
660-
- Add `--storage-provider oci` (though OCI is the default for backward compatibility)
660+
- Add `--storage-provider oci` (though OCI is the default for backward compatibility)

docs/user-guide/multi-cloud-quick-reference.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
This is a quick reference guide for common multi-cloud scenarios with genai-bench. For detailed information, see the [comprehensive guide](multi-cloud-auth-storage.md).
44

5-
> **Note**: For OpenAI, vLLM, and SGLang backends, both `--api-key` and `--model-api-key` are supported for backward compatibility.
5+
> **Note**: For OpenAI, SGLang and vLLM backends, both `--api-key` and `--model-api-key` are supported for backward compatibility.
66
77
## OpenAI Benchmarking
88

@@ -277,4 +277,4 @@ export GITHUB_REPO=benchmarks
277277
```bash
278278
# HuggingFace (for downloading tokenizers)
279279
export HF_TOKEN=hf_...
280-
```
280+
```

docs/user-guide/run-benchmark.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ export TRANSFORMERS_VERBOSITY=error
2121
genai-bench benchmark --api-backend openai \
2222
--api-base "http://localhost:8082" \
2323
--api-key "your-openai-api-key" \
24-
--api-model-name "vllm-model" \
24+
--api-model-name "meta-llama/Meta-Llama-3-70B-Instruct" \
2525
--model-tokenizer "/mnt/data/models/Meta-Llama-3.1-70B-Instruct" \
2626
--task text-to-text \
2727
--max-time-per-run 15 \
2828
--max-requests-per-run 300 \
29-
--server-engine "vLLM" \
29+
--server-engine "SGLang" \
3030
--server-gpu-type "H100" \
3131
--server-version "v0.6.0" \
3232
--server-gpu-count 4
@@ -119,7 +119,7 @@ genai-bench benchmark --api-backend oci-cohere \
119119
--api-base "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com" \
120120
--api-model-name "c4ai-command-r-08-2024" \
121121
--model-tokenizer "/home/ubuntu/c4ai-command-r-08-2024" \
122-
--server-engine "vLLM" \
122+
--server-engine "SGLang" \
123123
--task text-to-text \
124124
--num-concurrency 1 \
125125
--server-gpu-type A100-80G \
@@ -344,4 +344,4 @@ If you want to benchmark a specific portion of a vision dataset, you can use the
344344
- Access to ALL HuggingFace `load_dataset` parameters
345345
- Reusable and version-controllable
346346
- Support for complex configurations
347-
- Future-proof (no CLI updates needed for new HuggingFace features)
347+
- Future-proof (no CLI updates needed for new HuggingFace features)

docs/user-guide/upload-benchmark-result.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ To enable result uploading, use the following options with the `benchmark` comma
1616
genai-bench benchmark \
1717
--api-base "http://localhost:8082" \
1818
--api-key "your-openai-api-key" \
19-
--api-model-name "vllm-model" \
19+
--api-model-name "meta-llama/Meta-Llama-3-70B-Instruct" \
2020
--model-tokenizer "/mnt/data/models/Meta-Llama-3.1-70B-Instruct" \
2121
--task text-to-text \
2222
--max-time-per-run 15 \
2323
--max-requests-per-run 300 \
24-
--server-engine "vLLM" \
24+
--server-engine "SGLang" \
2525
--server-gpu-type "H100" \
2626
--server-version "v0.6.0" \
2727
--server-gpu-count 4 \
@@ -44,4 +44,4 @@ GenAI Bench now supports multiple cloud storage providers:
4444
- **GCP Cloud Storage**: Use `--storage-provider gcp`
4545
- **GitHub Releases**: Use `--storage-provider github`
4646

47-
For detailed configuration and authentication options for each provider, please refer to the [Multi-Cloud Authentication & Storage Guide](multi-cloud-auth-storage.md).
47+
For detailed configuration and authentication options for each provider, please refer to the [Multi-Cloud Authentication & Storage Guide](multi-cloud-auth-storage.md).

examples/experiment_excel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
LoggingManager("excel")
1212

1313

14-
folder_name = "/Users/changsu/openai_chat_vllm-model_tokenizer__mnt_data_models_Llama-3-70B-Instruct_20240904_003850" # noqa: E501
14+
folder_name = "<Path to your experiment folder>" # noqa: E501
1515
os.makedirs(folder_name, exist_ok=True)
1616
experiment_metadata, run_data = load_one_experiment(folder_name)
1717
create_workbook(

examples/experiment_plots.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
load_multiple_experiments,
77
load_one_experiment,
88
)
9-
from genai_bench.analysis.plot_report import plot_experiment_data
9+
from genai_bench.analysis.flexible_plot_report import plot_experiment_data_flexible
1010
from genai_bench.logging import LoggingManager
1111

1212
LoggingManager("plot")
1313

1414

1515
# Example usage with filtering multiple experiments
16-
folder_name = "/Users/changsu/experiment_plot"
16+
folder_name = "<Path to the experiment folder>"
1717
filter_criteria = {
18-
"model": "vllm-model",
18+
"model": "Llama-4-Scout-17B-16E-Instruct",
1919
}
2020

2121
os.makedirs(folder_name, exist_ok=True)
@@ -26,20 +26,20 @@
2626
print("Empty data after filtering")
2727
else:
2828
# Plot the data grouped by 'server_version'
29-
plot_experiment_data(
29+
plot_experiment_data_flexible(
3030
run_data_list, group_key="server_version", experiment_folder=folder_name
3131
)
3232

3333
# Plot for one experiment
3434
experiment_folder = os.path.join(
3535
folder_name,
36-
"openai_chat_vllm-model_tokenizer__mnt_data_models_Llama-3-70B-Instruct_20240904_003850",
36+
"openai_SGLang_v0.4.7.post1_text-to-text_Llama-4-Scout-17B-16E-Instruct_20250620_042005",
3737
)
3838
experiment_metadata, run_data = load_one_experiment(experiment_folder)
3939
if not experiment_metadata or not run_data:
4040
print("Didn't find any experiment data")
4141
else:
42-
plot_experiment_data(
42+
plot_experiment_data_flexible(
4343
[
4444
[experiment_metadata, run_data],
4545
],

genai_bench/cli/cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
from genai_bench.analysis.excel_report import create_workbook
1212
from genai_bench.analysis.experiment_loader import load_one_experiment
1313
from genai_bench.analysis.plot_report import (
14-
plot_experiment_data,
1514
plot_single_scenario_inference_speed_vs_throughput,
1615
)
16+
from genai_bench.analysis.flexible_plot_report import plot_experiment_data_flexible
17+
1718
from genai_bench.auth.unified_factory import UnifiedAuthFactory
1819
from genai_bench.cli.option_groups import (
1920
api_options,
@@ -506,7 +507,7 @@ def benchmark(
506507
),
507508
percentile="mean",
508509
)
509-
plot_experiment_data(
510+
plot_experiment_data_flexible(
510511
[
511512
(experiment_metadata, run_data),
512513
],

genai_bench/cli/report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def excel(ctx, experiment_folder, excel_name, metric_percentile):
7171
default=None,
7272
callback=validate_filter_criteria,
7373
help="A dictionary containing filter criteria for the plot. Default: {}. "
74-
"Example: '{'model': 'vllm-model'}'",
74+
"Example: '{'model': 'meta-llama/Meta-Llama-3-70B-Instruct'}'",
7575
)
7676
@click.option(
7777
"--plot-config",

tests/analysis/mock_experiment_data.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
{
2-
"cmd": "/home/changsu/miniconda3/bin/genai-bench --api_backend openai --api_base http://localhost:8082 --api_key your-openai-api-key --api_model_name vllm-model --model_tokenizer /mnt/data/models/Meta-Llama-3.1-70B-Instruct --task chat --run_time 1 --server_engine vLLM --server_gpu_type H100 --server_version v0.6.0 --server_gpu_count 4 --model Meta-Llama-3.1-70B-Instruct --num_concurrency (1, 2, 4, 8, 16, 32, 64, 128, 256) --traffic_scenario N(480,240)/(300,150) --traffic_scenario D(100,100) --traffic_scenario D(100,1000) --traffic_scenario D(2000,200) --traffic_scenario D(7800,200)",
2+
"cmd": "/home/changsu/miniconda3/bin/genai-bench --api_backend openai --api_base http://localhost:8082 --api_key your-openai-api-key --api_model_name meta-llama/Meta-Llama-3-70B-Instruct --model_tokenizer /mnt/data/models/Meta-Llama-3.1-70B-Instruct --task chat --run_time 1 --server_engine SGLang --server_gpu_type H100 --server_version v0.6.0 --server_gpu_count 4 --model Meta-Llama-3.1-70B-Instruct --num_concurrency (1, 2, 4, 8, 16, 32, 64, 128, 256) --traffic_scenario N(480,240)/(300,150) --traffic_scenario D(100,100) --traffic_scenario D(100,1000) --traffic_scenario D(2000,200) --traffic_scenario D(7800,200)",
33
"benchmark_version": "",
44
"api_backend": "openai",
55
"auth_config": {
66
"api_base": "http://localhost:8084",
77
"api_key": "********_key"
88
},
9-
"api_model_name": "vllm-model",
9+
"api_model_name": "meta-llama/Meta-Llama-3-70B-Instruct",
1010
"server_model_tokenizer": "/mnt/data/models/Meta-Llama-3.1-70B-Instruct",
1111
"model": "Meta-Llama-3.1-70B-Instruct",
1212
"task": "text-to-text",
@@ -29,13 +29,13 @@
2929
"D(7800,200)"
3030
],
3131
"additional_request_params": {},
32-
"server_engine": "vLLM",
32+
"server_engine": "SGLang",
3333
"server_version": "v0.6.0",
3434
"server_gpu_type": "H100",
3535
"server_gpu_count": "4",
3636
"max_time_per_run_s": 60,
3737
"max_requests_per_run": 300,
38-
"experiment_folder_name": "/home/changsu/openai_vLLM_v0.6.0_chat_vllm-model_tokenizer__mnt_data_models_Meta-Llama-3.1-70B-Instruct_20240906_165433",
38+
"experiment_folder_name": "/home/changsu/openai_SGLang_v0.6.0_chat_sglang-model_tokenizer__mnt_data_models_Meta-Llama-3.1-70B-Instruct_20240906_165433",
3939
"dataset_path": null,
4040
"dataset_prompt_column_index": 0,
4141
"character_token_ratio": 4.059085841694538

0 commit comments

Comments
 (0)