3030import time
3131from typing import Any , Dict , List , Optional , Union
3232
33- import gin
34-
3533# pyre-ignore [21]
3634import mlperf_loadgen as lg # @manual
3735import numpy as np
6462
6563USER_CONF = f"{ os .path .dirname (__file__ )} /user.conf"
6664
67- SUPPORTED_CONFIGS = {
68- "sampled-streaming-100b" : "streaming_100b.gin" ,
69- }
70-
7165
7266SCENARIO_MAP = { # pyre-ignore [5]
7367 "SingleStream" : lg .TestScenario .SingleStream ,
@@ -81,7 +75,49 @@ def get_args(): # pyre-ignore [3]
8175 """Parse commandline."""
8276 parser = argparse .ArgumentParser ()
8377 parser .add_argument (
84- "--dataset" , default = "debug" , choices = SUPPORTED_DATASETS , help = "dataset"
78+ "--dataset" , default = "sampled-streaming-100b" , choices = SUPPORTED_DATASETS , help = "name of the dataset"
79+ )
80+ parser .add_argument (
81+ "--model-path" , default = "" , help = "path to the model checkpoint. Example: /home/username/ckpts/streaming_100b/89/"
82+ )
83+ parser .add_argument (
84+ "--scenario-name" , default = "Server" , choices = {"SingleStream" , "MultiStream" , "Server" , "Offline" }, help = "inference benchmark scenario"
85+ )
86+ parser .add_argument (
87+ "--batchsize" , default = 20 , help = "batch size used in the benchmark"
88+ )
89+ parser .add_argument (
90+ "--output-trace" , default = False , help = "Whether to output trace"
91+ )
92+ parser .add_argument (
93+ "--data-producer-threads" , default = 16 , help = "Number of threads used in data producer"
94+ )
95+ parser .add_argument (
96+ "--compute-eval" , default = False , help = "If true, will run AccuracyOnly mode and outputs both predictions and labels for accuracy calcuations"
97+ )
98+ parser .add_argument (
99+ "--find-peak-performance" , default = False , help = "Whether to find peak performance in the benchmark"
100+ )
101+ parser .add_argument (
102+ "--dataset-path-prefix" , default = "" , help = "Prefix to the dataset path. Example: /home/username/"
103+ )
104+ parser .add_argument (
105+ "--warmup-ratio" , default = 0.1 , help = "The ratio of the dataset used to warmup SUT"
106+ )
107+ parser .add_argument (
108+ "--num-queries" , default = 500000 , help = "Number of queries to run in the benchmark"
109+ )
110+ parser .add_argument (
111+ "--target-qps" , default = 1500 , help = "Benchmark target QPS. Needs to be tuned for different implementations to balance latency and throughput"
112+ )
113+ parser .add_argument (
114+ "--numpy-rand-seed" , default = 123 , help = "Numpy random seed"
115+ )
116+ parser .add_argument (
117+ "--sparse-quant" , default = False , help = "Whether to quantize sparse arch"
118+ )
119+ parser .add_argument (
120+ "--dataset-percentage" , default = 0.001 , help = "Percentage of the dataset to run in the benchmark"
85121 )
86122 args , unknown_args = parser .parse_known_args ()
87123 logger .warning (f"unknown_args: { unknown_args } " )
@@ -363,33 +399,24 @@ def get_item_count(self) -> int:
363399 return self .total_requests
364400
365401
366- @gin .configurable
367402def run (
368403 dataset : str = "debug" ,
369404 model_path : str = "" ,
370405 scenario_name : str = "Server" ,
371406 batchsize : int = 16 ,
372- out_dir : str = "" ,
373407 output_trace : bool = False ,
374408 data_producer_threads : int = 4 ,
375409 compute_eval : bool = False ,
376410 find_peak_performance : bool = False ,
377- new_path_prefix : str = "" ,
378- train_split_percentage : float = 0.75 ,
411+ dataset_path_prefix : str = "" ,
379412 warmup_ratio : float = 0.1 ,
380- # below will override mlperf rules compliant settings - don't use for official submission
381- duration : Optional [int ] = None ,
382413 target_qps : Optional [int ] = None ,
383- max_latency : Optional [float ] = None ,
384414 num_queries : Optional [int ] = None ,
385- samples_per_query_multistream : int = 8 ,
386- max_num_samples : int = - 1 ,
387415 numpy_rand_seed : int = 123 ,
388- dev_mode : bool = False ,
389416 sparse_quant : bool = False ,
390417 dataset_percentage : float = 1.0 ,
391418) -> None :
392- set_dev_mode (dev_mode )
419+ set_dev_mode (False )
393420 if scenario_name not in SCENARIO_MAP :
394421 raise NotImplementedError ("valid scanarios:" + str (list (SCENARIO_MAP .keys ())))
395422 scenario = SCENARIO_MAP [scenario_name ]
@@ -408,7 +435,7 @@ def run(
408435 compute_eval = compute_eval ,
409436 )
410437 is_streaming : bool = "streaming" in dataset
411- dataset , kwargs = get_dataset (dataset , new_path_prefix )
438+ dataset , kwargs = get_dataset (dataset , dataset_path_prefix )
412439
413440 ds : Dataset = dataset (
414441 hstu_config = hstu_config ,
@@ -430,11 +457,6 @@ def run(
430457 logger .error ("{} not found" .format (user_conf ))
431458 sys .exit (1 )
432459
433- if out_dir :
434- output_dir = os .path .abspath (out_dir )
435- os .makedirs (output_dir , exist_ok = True )
436- os .chdir (output_dir )
437-
438460 # warmup
439461 warmup_ids = list (range (batchsize ))
440462 ds .load_query_samples (warmup_ids )
@@ -453,7 +475,7 @@ def run(
453475 if not is_streaming
454476 else ds .get_item_count ()
455477 )
456- train_size : int = round ( train_split_percentage * count ) if not is_streaming else 0
478+ train_size : int = 0
457479
458480 settings = lg .TestSettings ()
459481 settings .FromConfig (user_conf , model_path , scenario_name )
@@ -489,21 +511,10 @@ def flush_queries() -> None:
489511 if find_peak_performance :
490512 settings .mode = lg .TestMode .FindPeakPerformance
491513
492- if duration :
493- settings .min_duration_ms = duration
494- settings .max_duration_ms = duration
495-
496514 if target_qps :
497515 settings .server_target_qps = float (target_qps )
498516 settings .offline_expected_qps = float (target_qps )
499517
500- if samples_per_query_multistream :
501- settings .multi_stream_samples_per_query = samples_per_query_multistream
502-
503- if max_latency :
504- settings .server_target_latency_ns = int (max_latency * NANO_SEC )
505- settings .multi_stream_expected_latency_ns = int (max_latency * NANO_SEC )
506-
507518 # inference benchmark warmup
508519 if is_streaming :
509520 ds .init_sut ()
@@ -549,7 +560,7 @@ def flush_queries() -> None:
549560 sut = lg .ConstructSUT (issue_queries , flush_queries )
550561 qsl = lg .ConstructQSL (
551562 count ,
552- min ( count , max_num_samples ) if max_num_samples > 0 else count ,
563+ count ,
553564 load_query_samples ,
554565 ds .unload_query_samples ,
555566 )
@@ -572,18 +583,28 @@ def flush_queries() -> None:
572583 if int (os .environ .get ("WORLD_SIZE" , 1 )) > 1 :
573584 model_family .predict (None )
574585
575- if out_dir :
576- with open ("results.json" , "w" ) as f :
577- json .dump (final_results , f , sort_keys = True , indent = 4 )
578-
579586
580587def main () -> None :
581588 set_verbose_level (1 )
582589 args = get_args ()
583590 logger .info (args )
584- gin_path = f"{ os .path .dirname (__file__ )} /gin/{ SUPPORTED_CONFIGS [args .dataset ]} "
585- gin .parse_config_file (gin_path )
586- run (dataset = args .dataset )
591+ run (
592+ dataset = args .dataset ,
593+ model_path = args .model_path ,
594+ scenario_name = args .scenario_name ,
595+ batchsize = args .batchsize ,
596+ output_trace = args .output_trace ,
597+ data_producer_threads = args .data_producer_threads ,
598+ compute_eval = args .compute_eval ,
599+ find_peak_performance = args .find_peak_performance ,
600+ dataset_path_prefix = args .dataset_path_prefix ,
601+ warmup_ratio = args .warmup_ratio ,
602+ target_qps = args .target_qps ,
603+ num_queries = args .num_queries ,
604+ numpy_rand_seed = args .numpy_rand_seed ,
605+ sparse_quant = args .sparse_quant ,
606+ dataset_percentage = args .dataset_percentage ,
607+ )
587608
588609
589610if __name__ == "__main__" :
0 commit comments