11from .base import BaseCheck
2+ from ..constants import *
3+ from ..loader import SubmissionLogs
4+ from ..configuration .configuration import Config
5+ import os
26
37class PerformanceCheck (BaseCheck ):
4- def __init__ (self , log , path , parsed_log ):
8+ def __init__ (self , log , path , config : Config , submission_logs : SubmissionLogs ):
59 super ().__init__ (log , path )
6- self .parsed_log = parsed_log
7- self .checks .append (self .sample_check )
10+ self .submission_logs = submission_logs
11+ self .mlperf_log = self .submission_logs .performance_log
12+ self .system_json = self .submission_logs .system_json
13+ self .config = config
14+ self .model = self .submission_logs .loader_data .get ("benchmark" , "" )
15+ self .model_mapping = self .submission_logs .loader_data .get ("model_mapping" , {})
16+ self .model = self .config .get_mlperf_model (self .model , self .model_mapping )
17+ self .scenario = self .submission_logs .loader_data .get ("scenario" , "" )
18+ self .division = self .submission_logs .loader_data .get ("division" , "" )
19+ self .setup_checks ()
820
9- def sample_check (self ):
10- return True
21+ def setup_checks (self ):
22+ self .checks .append (self .missing_check )
23+ self .checks .append (self .loadgen_errors_check )
24+ self .checks .append (self .equal_issue_check )
25+ self .checks .append (self .performance_sample_count_check )
26+ self .checks .append (self .seeds_check )
27+ self .checks .append (self .latency_check )
28+ self .checks .append (self .min_query_count_check )
29+ self .checks .append (self .min_duration_check )
30+ self .checks .append (self .network_check )
31+
32+
33+ def missing_check (self ):
34+ if self .mlperf_log is None :
35+ self .log .error ("Performance log missing at %s" , self .path )
36+ return False
37+ return True
38+
39+ def loadgen_errors_check (self ):
40+ if self .mlperf_log .has_error ():
41+ if self .config .ignore_uncommited :
42+ has_other_errors = False
43+ for error in self .mlperf_log .get_errors ():
44+ if "Loadgen built with uncommitted changes!" not in error ["value" ]:
45+ has_other_errors = True
46+ self .log .error ("%s contains errors:" , self .path )
47+ for error in self .mlperf_log .get_errors ():
48+ self .log .error ("%s" , error ["value" ])
49+
50+ if not self .config .ignore_uncommited or has_other_errors :
51+ self .log .error (
52+ "%s has loadgen errors, number of errors: %s" , self .path , self .mlperf_log .num_errors ()
53+ )
54+ return False
55+ return True
56+
57+
58+ def equal_issue_check (self ):
59+ if self .config .requires_equal_issue (self .model , self .division ) and self .mlperf_log ["effective_sample_concatenate_permutation" ]:
60+ self .log .error ("%s requires equal issue mode (sample_concatenate_permutation), expected=true, found=false" , self .path )
61+ return False
62+ return True
63+
64+ def performance_sample_count_check (self ):
65+ required_performance_sample_count = self .config .get_performance_sample_count (self .model )
66+ performance_sample_count = self .mlperf_log ["effective_performance_sample_count" ]
67+ if performance_sample_count < required_performance_sample_count :
68+ self .log .error (
69+ "%s performance_sample_count, found %d, needs to be >= %d" ,
70+ self .path ,
71+ performance_sample_count ,
72+ required_performance_sample_count ,
73+ )
74+ return False
75+ return True
76+
77+ def seeds_check (self ):
78+ config_seeds = self .config .seeds
79+ qsl_rng_seed = self .mlperf_log ["effective_qsl_rng_seed" ]
80+ sample_index_rng_seed = self .mlperf_log ["effective_sample_index_rng_seed" ]
81+ schedule_rng_seed = self .mlperf_log ["effective_schedule_rng_seed" ]
82+ is_valid = True
83+ if qsl_rng_seed != config_seeds ["qsl_rng_seed" ]:
84+ self .log .error (
85+ "%s qsl_rng_seed is wrong, expected=%s, found=%s" ,
86+ self .path ,
87+ config_seeds ["qsl_rng_seed" ],
88+ qsl_rng_seed ,
89+ )
90+ is_valid = False
91+ if sample_index_rng_seed != config_seeds ["sample_index_rng_seed" ]:
92+ self .log .error (
93+ "%s sample_index_rng_seed is wrong, expected=%s, found=%s" ,
94+ self .path ,
95+ config_seeds ["sample_index_rng_seed" ],
96+ sample_index_rng_seed ,
97+ )
98+ is_valid = False
99+ if schedule_rng_seed != config_seeds ["schedule_rng_seed" ]:
100+ self .log .error (
101+ "%s schedule_rng_seed is wrong, expected=%s, found=%s" ,
102+ self .path ,
103+ config_seeds ["schedule_rng_seed" ],
104+ schedule_rng_seed ,
105+ )
106+ is_valid = False
107+ return is_valid
108+
109+ def latency_check (self ):
110+ uses_early_stopping = self .config .uses_early_stopping (self .scenario )
111+ if uses_early_stopping :
112+ # check if early_stopping condition was met
113+ if not self .mlperf_log ["early_stopping_met" ]:
114+ early_stopping_result = self .mlperf_log ["early_stopping_result" ]
115+ self .log .error (
116+ "Early stopping condition was not met, msg=%s" ,
117+ early_stopping_result ,
118+ )
119+ return False
120+ # If the scenario has a target latency (Server scenario), check
121+ # that the target latency that was passed to the early stopping
122+ # is less than the target latency.
123+ target_latency = self .config .latency_constraint .get (
124+ self .model , dict ()).get (self .scenario )
125+ if target_latency :
126+ early_stopping_latency_ns = self .mlperf_log ["effective_target_latency_ns" ]
127+ self .log .info (
128+ "Target latency: %s, Early Stopping Latency: %s, Scenario: %s" ,
129+ target_latency ,
130+ early_stopping_latency_ns ,
131+ self .scenario ,
132+ )
133+ if early_stopping_latency_ns > target_latency :
134+ self .log .error (
135+ "%s Latency constraint with early stopping not met, expected=%s, found=%s" ,
136+ self .path ,
137+ target_latency ,
138+ early_stopping_latency_ns ,
139+ )
140+ return False
141+ else :
142+ # check if the benchmark meets latency constraint
143+ latency_99_percentile = self .mlperf_log ["result_99.00_percentile_latency_ns" ]
144+ target_latency = self .config .latency_constraint .get (
145+ self .model , dict ()).get (self .scenario )
146+ self .log .info (
147+ "Target latency: %s, Latency: %s, Scenario: %s" ,
148+ target_latency ,
149+ latency_99_percentile ,
150+ self .scenario ,
151+ )
152+ if target_latency :
153+ if latency_99_percentile > target_latency :
154+ self .log .error (
155+ "%s Latency constraint not met, expected=%s, found=%s" ,
156+ self .path ,
157+ target_latency ,
158+ latency_99_percentile ,
159+ )
160+ return False
161+ return True
162+
163+ def min_query_count_check (self ):
164+ uses_early_stopping = self .config .uses_early_stopping (self .scenario )
165+ min_query_count = self .mlperf_log ["effective_min_query_count" ]
166+ samples_per_query = self .mlperf_log ["effective_samples_per_query" ]
167+ if not uses_early_stopping :
168+ required_min_query_count = self .config .get_min_query_count (self .model , self .scenario )
169+ if required_min_query_count and min_query_count < required_min_query_count :
170+ self .log .error (
171+ "%s Required minimum Query Count not met by user config, Expected=%s, Found=%s" ,
172+ self .path ,
173+ required_min_query_count ,
174+ min_query_count ,
175+ )
176+ return False
177+ if self .scenario .lower () == "offline" and (
178+ samples_per_query < OFFLINE_MIN_SPQ_SINCE_V4 [self .model ]) and self .division .lower () == "closed" :
179+ self .log .error (
180+ "%s Required minimum samples per query not met by user config, Expected=%s, Found=%s" ,
181+ self .path ,
182+ OFFLINE_MIN_SPQ_SINCE_V4 [self .model ],
183+ samples_per_query ,
184+ )
185+ return False
186+ return True
187+
188+ def min_duration_check (self ):
189+ required_min_duration = TEST_DURATION_MS
190+ min_duration = self .mlperf_log ["effective_min_duration_ms" ]
191+ if min_duration < required_min_duration :
192+ self .log .error (
193+ "%s Test duration less than 600s in user config. expected=%s, found=%s" ,
194+ self .path ,
195+ required_min_duration ,
196+ min_duration ,
197+ )
198+ return False
199+ return True
200+
201+ def network_check (self ):
202+ is_network_mode_sys_spec_str = self .system_json .get (SYSTEM_DESC_IS_NETWORK_MODE )
203+ is_network_system = (
204+ is_network_mode_sys_spec_str .lower () == "true"
205+ if is_network_mode_sys_spec_str is not None
206+ else False
207+ )
208+ # verify that the system corresponds the division
209+ is_valid = True
210+ expected_state_by_division = {"network" : True , "closed" : False }
211+ if self .division in expected_state_by_division :
212+ is_valid = expected_state_by_division [self .division ] is is_network_system
213+ if not is_valid :
214+ self .log .error (
215+ f"{ self .path } incorrect network mode (={ is_network_system } ) for division '{ self .division } '"
216+ )
217+ return False
218+
219+
220+ sut_name = self .mlperf_log ["sut_name" ]
221+ if is_network_system :
222+ # for network mode verify the SUT name is valid, according to the rules
223+ # (must include "Network SUT" in name)
224+ if NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME not in sut_name :
225+ self .log .error (
226+ f"{ self .path } invalid sut name for network mode. expecting the substring '{ NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME } ' got '{ sut_name } '"
227+ )
228+ return False
229+
230+ return True
0 commit comments