Skip to content

Commit cb7db52

Browse files
Add performance checks new submission checker
1 parent c31dc37 commit cb7db52

File tree

7 files changed

+301
-42
lines changed

7 files changed

+301
-42
lines changed

tools/submission/submission_checker/checks/base.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,22 @@ def run_checks(self):
2020
"""
2121
valid = True
2222
errors = []
23-
for check in check:
24-
v, msg = self.execute(check)
23+
for check in self.checks:
24+
v = self.execute(check)
2525
valid &= v
26-
if not v:
27-
errors.append(msg)
28-
return valid, errors
26+
if not valid:
27+
return False
28+
return valid
2929

30-
def execute(check):
30+
def execute(self, check):
3131
return check()
3232

3333
def __call__(self):
3434
"""Allows the check instance to be called like a function."""
3535
self.log("Starting check...")
36-
valid, errors = self.run_checks()
36+
valid = self.run_checks()
3737
if valid:
3838
self.log.info("Checks passed")
3939
else:
4040
self.log.error("%s Checks failed", self.path)
41-
for error in errors:
42-
self.log.error(error)
4341
return valid
Lines changed: 225 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,230 @@
11
from .base import BaseCheck
2+
from ..constants import *
3+
from ..loader import SubmissionLogs
4+
from ..configuration.configuration import Config
5+
import os
26

37
class PerformanceCheck(BaseCheck):
4-
def __init__(self, log, path, parsed_log):
8+
def __init__(self, log, path, config: Config, submission_logs: SubmissionLogs):
59
super().__init__(log, path)
6-
self.parsed_log = parsed_log
7-
self.checks.append(self.sample_check)
10+
self.submission_logs = submission_logs
11+
self.mlperf_log = self.submission_logs.performance_log
12+
self.system_json = self.submission_logs.system_json
13+
self.config = config
14+
self.model = self.submission_logs.loader_data.get("benchmark", "")
15+
self.model_mapping = self.submission_logs.loader_data.get("model_mapping", {})
16+
self.model = self.config.get_mlperf_model(self.model, self.model_mapping)
17+
self.scenario = self.submission_logs.loader_data.get("scenario", "")
18+
self.division = self.submission_logs.loader_data.get("division", "")
19+
self.setup_checks()
820

9-
def sample_check(self):
10-
return True
21+
def setup_checks(self):
22+
self.checks.append(self.missing_check)
23+
self.checks.append(self.loadgen_errors_check)
24+
self.checks.append(self.equal_issue_check)
25+
self.checks.append(self.performance_sample_count_check)
26+
self.checks.append(self.seeds_check)
27+
self.checks.append(self.latency_check)
28+
self.checks.append(self.min_query_count_check)
29+
self.checks.append(self.min_duration_check)
30+
self.checks.append(self.network_check)
31+
32+
33+
def missing_check(self):
34+
if self.mlperf_log is None:
35+
self.log.error("Performance log missing at %s", self.path)
36+
return False
37+
return True
38+
39+
def loadgen_errors_check(self):
40+
if self.mlperf_log.has_error():
41+
if self.config.ignore_uncommited:
42+
has_other_errors = False
43+
for error in self.mlperf_log.get_errors():
44+
if "Loadgen built with uncommitted changes!" not in error["value"]:
45+
has_other_errors = True
46+
self.log.error("%s contains errors:", self.path)
47+
for error in self.mlperf_log.get_errors():
48+
self.log.error("%s", error["value"])
49+
50+
if not self.config.ignore_uncommited or has_other_errors:
51+
self.log.error(
52+
"%s has loadgen errors, number of errors: %s", self.path, self.mlperf_log.num_errors()
53+
)
54+
return False
55+
return True
56+
57+
58+
def equal_issue_check(self):
59+
if self.config.requires_equal_issue(self.model, self.division) and self.mlperf_log["effective_sample_concatenate_permutation"]:
60+
self.log.error("%s requires equal issue mode (sample_concatenate_permutation), expected=true, found=false", self.path)
61+
return False
62+
return True
63+
64+
def performance_sample_count_check(self):
65+
required_performance_sample_count = self.config.get_performance_sample_count(self.model)
66+
performance_sample_count = self.mlperf_log["effective_performance_sample_count"]
67+
if performance_sample_count < required_performance_sample_count:
68+
self.log.error(
69+
"%s performance_sample_count, found %d, needs to be >= %d",
70+
self.path,
71+
performance_sample_count,
72+
required_performance_sample_count,
73+
)
74+
return False
75+
return True
76+
77+
def seeds_check(self):
78+
config_seeds = self.config.seeds
79+
qsl_rng_seed = self.mlperf_log["effective_qsl_rng_seed"]
80+
sample_index_rng_seed = self.mlperf_log["effective_sample_index_rng_seed"]
81+
schedule_rng_seed = self.mlperf_log["effective_schedule_rng_seed"]
82+
is_valid = True
83+
if qsl_rng_seed != config_seeds["qsl_rng_seed"]:
84+
self.log.error(
85+
"%s qsl_rng_seed is wrong, expected=%s, found=%s",
86+
self.path,
87+
config_seeds["qsl_rng_seed"],
88+
qsl_rng_seed,
89+
)
90+
is_valid = False
91+
if sample_index_rng_seed != config_seeds["sample_index_rng_seed"]:
92+
self.log.error(
93+
"%s sample_index_rng_seed is wrong, expected=%s, found=%s",
94+
self.path,
95+
config_seeds["sample_index_rng_seed"],
96+
sample_index_rng_seed,
97+
)
98+
is_valid = False
99+
if schedule_rng_seed != config_seeds["schedule_rng_seed"]:
100+
self.log.error(
101+
"%s schedule_rng_seed is wrong, expected=%s, found=%s",
102+
self.path,
103+
config_seeds["schedule_rng_seed"],
104+
schedule_rng_seed,
105+
)
106+
is_valid = False
107+
return is_valid
108+
109+
def latency_check(self):
110+
uses_early_stopping = self.config.uses_early_stopping(self.scenario)
111+
if uses_early_stopping:
112+
# check if early_stopping condition was met
113+
if not self.mlperf_log["early_stopping_met"]:
114+
early_stopping_result = self.mlperf_log["early_stopping_result"]
115+
self.log.error(
116+
"Early stopping condition was not met, msg=%s",
117+
early_stopping_result,
118+
)
119+
return False
120+
# If the scenario has a target latency (Server scenario), check
121+
# that the target latency that was passed to the early stopping
122+
# is less than the target latency.
123+
target_latency = self.config.latency_constraint.get(
124+
self.model, dict()).get(self.scenario)
125+
if target_latency:
126+
early_stopping_latency_ns = self.mlperf_log["effective_target_latency_ns"]
127+
self.log.info(
128+
"Target latency: %s, Early Stopping Latency: %s, Scenario: %s",
129+
target_latency,
130+
early_stopping_latency_ns,
131+
self.scenario,
132+
)
133+
if early_stopping_latency_ns > target_latency:
134+
self.log.error(
135+
"%s Latency constraint with early stopping not met, expected=%s, found=%s",
136+
self.path,
137+
target_latency,
138+
early_stopping_latency_ns,
139+
)
140+
return False
141+
else:
142+
# check if the benchmark meets latency constraint
143+
latency_99_percentile = self.mlperf_log["result_99.00_percentile_latency_ns"]
144+
target_latency = self.config.latency_constraint.get(
145+
self.model, dict()).get(self.scenario)
146+
self.log.info(
147+
"Target latency: %s, Latency: %s, Scenario: %s",
148+
target_latency,
149+
latency_99_percentile,
150+
self.scenario,
151+
)
152+
if target_latency:
153+
if latency_99_percentile > target_latency:
154+
self.log.error(
155+
"%s Latency constraint not met, expected=%s, found=%s",
156+
self.path,
157+
target_latency,
158+
latency_99_percentile,
159+
)
160+
return False
161+
return True
162+
163+
def min_query_count_check(self):
164+
uses_early_stopping = self.config.uses_early_stopping(self.scenario)
165+
min_query_count = self.mlperf_log["effective_min_query_count"]
166+
samples_per_query = self.mlperf_log["effective_samples_per_query"]
167+
if not uses_early_stopping:
168+
required_min_query_count = self.config.get_min_query_count(self.model, self.scenario)
169+
if required_min_query_count and min_query_count < required_min_query_count:
170+
self.log.error(
171+
"%s Required minimum Query Count not met by user config, Expected=%s, Found=%s",
172+
self.path,
173+
required_min_query_count,
174+
min_query_count,
175+
)
176+
return False
177+
if self.scenario.lower() == "offline" and (
178+
samples_per_query < OFFLINE_MIN_SPQ_SINCE_V4[self.model]) and self.division.lower() == "closed":
179+
self.log.error(
180+
"%s Required minimum samples per query not met by user config, Expected=%s, Found=%s",
181+
self.path,
182+
OFFLINE_MIN_SPQ_SINCE_V4[self.model],
183+
samples_per_query,
184+
)
185+
return False
186+
return True
187+
188+
def min_duration_check(self):
189+
required_min_duration = TEST_DURATION_MS
190+
min_duration = self.mlperf_log["effective_min_duration_ms"]
191+
if min_duration < required_min_duration:
192+
self.log.error(
193+
"%s Test duration less than 600s in user config. expected=%s, found=%s",
194+
self.path,
195+
required_min_duration,
196+
min_duration,
197+
)
198+
return False
199+
return True
200+
201+
def network_check(self):
202+
is_network_mode_sys_spec_str = self.system_json.get(SYSTEM_DESC_IS_NETWORK_MODE)
203+
is_network_system = (
204+
is_network_mode_sys_spec_str.lower() == "true"
205+
if is_network_mode_sys_spec_str is not None
206+
else False
207+
)
208+
# verify that the system corresponds the division
209+
is_valid = True
210+
expected_state_by_division = {"network": True, "closed": False}
211+
if self.division in expected_state_by_division:
212+
is_valid = expected_state_by_division[self.division] is is_network_system
213+
if not is_valid:
214+
self.log.error(
215+
f"{self.path} incorrect network mode (={is_network_system}) for division '{self.division}'"
216+
)
217+
return False
218+
219+
220+
sut_name = self.mlperf_log["sut_name"]
221+
if is_network_system:
222+
# for network mode verify the SUT name is valid, according to the rules
223+
# (must include "Network SUT" in name)
224+
if NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME not in sut_name:
225+
self.log.error(
226+
f"{self.path} invalid sut name for network mode. expecting the substring '{NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME}' got '{sut_name}'"
227+
)
228+
return False
229+
230+
return True

tools/submission/submission_checker/configuration/configuration.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def load_config(self, version):
2323
# TODO: Load values from
2424
self.models = self.base["models"]
2525
self.seeds = self.base["seeds"]
26-
self.test05_seeds = self.base["test05_seeds"]
2726
self.accuracy_target = self.base["accuracy-target"]
2827
self.accuracy_delta_perc = self.base["accuracy-delta-perc"]
2928
self.accuracy_upper_limit = self.base.get("accuracy-upper-limit", {})
@@ -73,6 +72,8 @@ def get_mlperf_model(self, model, extra_model_mapping=None):
7372
model = "bert-99.9"
7473
elif "bert-99" in model:
7574
model = "bert-99"
75+
elif "llama3_1-405b" in model:
76+
model = "llama3.1-405b"
7677
# map again
7778
mlperf_model = self.base["model_mapping"].get(model, model)
7879
return mlperf_model

tools/submission/submission_checker/constants.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,7 @@
693693
"rgat": 788379,
694694
"deepseek-r1": 4388,
695695
"whisper": 1633,
696+
"pointpainting": 24576,
696697
}
697698

698699
SCENARIO_MAPPING = {
@@ -1049,4 +1050,12 @@
10491050
"v5.1": "{division}/{submitter}/compliance/{system}/{benchmark}/{scenario}/TEST06/accuracy/mlperf_log_detail.txt",
10501051
"v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST06/accuracy/mlperf_log_detail.txt",
10511052
"default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST06/accuracy/mlperf_log_detail.txt",
1053+
}
1054+
1055+
1056+
SYSTEM_PATH = {
1057+
"v5.0": "{division}/{submitter}/systems/{system}.json",
1058+
"v5.1": "{division}/{submitter}/systems/{system}.json",
1059+
"v6.0": "{division}/{submitter}/systems/{system}.json",
1060+
"default": "{division}/{submitter}/systems/{system}.json",
10521061
}

tools/submission/submission_checker/loader.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import os
2-
from .constants import PERFORMANCE_LOG_PATH, PERFORMANCE_SUMMARY_PATH, ACCURACY_LOG_PATH, VALID_DIVISIONS
2+
from .constants import PERFORMANCE_LOG_PATH, PERFORMANCE_SUMMARY_PATH, ACCURACY_LOG_PATH, SYSTEM_PATH, VALID_DIVISIONS
33
from .utils import list_dir
44
from .parsers.loadgen_parser import LoadgenParser
55
from typing import Generator, Literal
66
import logging
7+
import json
78

89
logging.basicConfig(
910
level=logging.INFO,
@@ -12,9 +13,11 @@
1213

1314

1415
class SubmissionLogs:
15-
def __init__(self, performance_log, accuracy_log) -> None:
16+
def __init__(self, performance_log, accuracy_log, system_json, loader_data = {}) -> None:
1617
self.performance_log = performance_log
1718
self.accuracy_log = accuracy_log
19+
self.system_json = system_json
20+
self.loader_data = loader_data
1821

1922

2023
class Loader:
@@ -25,12 +28,17 @@ def __init__(self, root, version) -> None:
2528
self.perf_log_path = os.path.join(self.root, PERFORMANCE_LOG_PATH.get(version, PERFORMANCE_LOG_PATH["default"]))
2629
self.perf_summary_path = os.path.join(self.root, PERFORMANCE_SUMMARY_PATH.get(version, PERFORMANCE_SUMMARY_PATH["default"]))
2730
self.acc_log_path = os.path.join(self.root, ACCURACY_LOG_PATH.get(version, ACCURACY_LOG_PATH["default"]))
31+
self.system_log_path = os.path.join(self.root, SYSTEM_PATH.get(version, SYSTEM_PATH["default"]))
2832

29-
def load_single_log(self, path, log_type: Literal["Performance", "Accuracy", "Test"]):
33+
def load_single_log(self, path, log_type: Literal["Performance", "Accuracy", "Test", "System"]):
3034
log = None
3135
if os.path.exists(path):
3236
self.logger.info("Loading %s log from %s", log_type, path)
33-
log = LoadgenParser(path)
37+
if log_type in ["Performance", "Accuracy", "Test"]:
38+
log = LoadgenParser(path)
39+
if log_type in ["System"]:
40+
with open(path) as f:
41+
log = json.load(f)
3442
else:
3543
self.logger.info("Could not load %s log from %s, path does not exist", log_type, path)
3644
return log
@@ -43,14 +51,28 @@ def load(self) -> Generator[SubmissionLogs, None, None]:
4351
division_path = os.path.join(self.root, division)
4452
for submitter in list_dir(division_path):
4553
results_path = os.path.join(division_path, submitter, "results")
54+
model_mapping = {}
55+
if division == "open" and os.path.exists(os.path.join(division_path, submitter, "model_mapping.json")):
56+
model_mapping = self.load_single_log(os.path.join(division_path, submitter, "model_mapping.json"), "System")
4657
for system in list_dir(results_path):
4758
system_path = os.path.join(results_path, system)
59+
system_json_path = self.system_log_path.format(division = division, submitter = submitter, system = system)
60+
system_json = self.load_single_log(system_json_path, "System")
4861
for benchmark in list_dir(system_path):
4962
benchmark_path = os.path.join(system_path, benchmark)
5063
for scenario in list_dir(benchmark_path):
5164
scenario_path = os.path.join(benchmark_path, benchmark)
5265
perf_path = self.perf_log_path.format(division = division, submitter = submitter, system = system, benchmark = benchmark, scenario = scenario)
5366
acc_path = self.acc_log_path.format(division = division, submitter = submitter, system = system, benchmark = benchmark, scenario = scenario)
5467
perf_log = self.load_single_log(perf_path, "Performance")
55-
acc_log = perf_log = self.load_single_log(acc_path, "Accuracy")
56-
yield SubmissionLogs(perf_log, acc_log)
68+
acc_log = self.load_single_log(acc_path, "Accuracy")
69+
loader_data = {
70+
"division": division,
71+
"submitter": submitter,
72+
"system": system,
73+
"benchmark": benchmark,
74+
"scenario": scenario,
75+
"perf_path": perf_path,
76+
"model_mapping": model_mapping,
77+
}
78+
yield SubmissionLogs(perf_log, acc_log, system_json, loader_data)

0 commit comments

Comments
 (0)