Skip to content

Commit 940612d

Browse files
committed
add accuracy_sample_count
1 parent 8999c4d commit 940612d

File tree

5 files changed

+43
-10
lines changed

5 files changed

+43
-10
lines changed

loadgen/bindings/python_api.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,8 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
333333
&TestSettings::performance_issue_same_index)
334334
.def_readwrite("performance_sample_count_override",
335335
&TestSettings::performance_sample_count_override)
336+
.def_readwrite("accuracy_sample_count_override",
337+
&TestSettings::accuracy_sample_count_override)
336338
.def_readwrite("test05", &TestSettings::test05)
337339
.def_readwrite("test05_qsl_rng_seed", &TestSettings::test05_qsl_rng_seed)
338340
.def_readwrite("test05_sample_index_rng_seed",

loadgen/loadgen.cc

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -621,18 +621,23 @@ void LoadSamplesToRam(QuerySampleLibrary* qsl,
621621

622622
/// \brief Generates random sets of samples in the QSL that we can load into
623623
/// RAM at the same time.
624+
/// \param qsl The query sample library.
625+
/// \param settings The test settings.
626+
/// \param total_sample_count The total number of samples to generate indices for.
627+
/// In accuracy mode, this should be accuracy_sample_count.
628+
/// In performance mode, this should be performance_sample_count.
624629
std::vector<LoadableSampleSet> GenerateLoadableSets(
625-
QuerySampleLibrary* qsl, const TestSettingsInternal& settings) {
630+
QuerySampleLibrary* qsl, const TestSettingsInternal& settings,
631+
size_t total_sample_count) {
626632
auto tracer = MakeScopedTracer(
627633
[](AsyncTrace& trace) { trace("GenerateLoadableSets"); });
628634

629635
std::vector<LoadableSampleSet> result;
630636
std::mt19937 qsl_rng(settings.qsl_rng_seed);
631637

632-
// Generate indices for all available samples in the QSL.
633-
const size_t qsl_total_count = qsl->TotalSampleCount();
634-
std::vector<QuerySampleIndex> samples(qsl_total_count);
635-
for (size_t i = 0; i < qsl_total_count; i++) {
638+
// Generate indices for the specified sample count.
639+
std::vector<QuerySampleIndex> samples(total_sample_count);
640+
for (size_t i = 0; i < total_sample_count; i++) {
636641
samples[i] = static_cast<QuerySampleIndex>(i);
637642
}
638643

@@ -754,7 +759,8 @@ std::pair<PerformanceSummary, PerformanceSummary> FindBoundaries(
754759
});
755760

756761
std::vector<loadgen::LoadableSampleSet> loadable_sets(
757-
loadgen::GenerateLoadableSets(qsl, u_settings));
762+
loadgen::GenerateLoadableSets(qsl, u_settings,
763+
u_settings.performance_sample_count));
758764
const LoadableSampleSet& performance_set = loadable_sets.front();
759765
LoadSamplesToRam(qsl, performance_set.set);
760766

@@ -841,7 +847,8 @@ void RunPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
841847

842848
// Use first loadable set as the performance set.
843849
std::vector<loadgen::LoadableSampleSet> loadable_sets(
844-
loadgen::GenerateLoadableSets(qsl, settings));
850+
loadgen::GenerateLoadableSets(qsl, settings,
851+
settings.performance_sample_count));
845852
const LoadableSampleSet& performance_set = loadable_sets.front();
846853
LoadSamplesToRam(qsl, performance_set.set);
847854

@@ -974,7 +981,8 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
974981
// 1. Check whether the lower bound came from user satisfy performance
975982
// constraints or not.
976983
std::vector<loadgen::LoadableSampleSet> base_loadable_sets(
977-
loadgen::GenerateLoadableSets(qsl, base_settings));
984+
loadgen::GenerateLoadableSets(qsl, base_settings,
985+
base_settings.performance_sample_count));
978986
const LoadableSampleSet& base_performance_set = base_loadable_sets.front();
979987
LoadSamplesToRam(qsl, base_performance_set.set);
980988

@@ -1044,7 +1052,9 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
10441052

10451053
// Reuse performance_set, u_perf_summary has the largest 'samples_per_query'.
10461054
std::vector<loadgen::LoadableSampleSet> loadable_sets(
1047-
loadgen::GenerateLoadableSets(qsl, u_perf_summary.settings));
1055+
loadgen::GenerateLoadableSets(
1056+
qsl, u_perf_summary.settings,
1057+
u_perf_summary.settings.performance_sample_count));
10481058
const LoadableSampleSet& performance_set = loadable_sets.front();
10491059
LoadSamplesToRam(qsl, performance_set.set);
10501060

@@ -1089,7 +1099,8 @@ void RunAccuracyMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
10891099
});
10901100

10911101
std::vector<loadgen::LoadableSampleSet> loadable_sets(
1092-
loadgen::GenerateLoadableSets(qsl, settings));
1102+
loadgen::GenerateLoadableSets(qsl, settings,
1103+
settings.accuracy_sample_count));
10931104

10941105
for (auto& loadable_set : loadable_sets) {
10951106
{

loadgen/test_settings.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ struct TestSettings {
263263
uint64_t performance_issue_same_index = 0;
264264
/// \brief Overrides QSL->PerformanceSampleCount() when non-zero
265265
uint64_t performance_sample_count_override = 0;
266+
/// \brief Specifies the number of samples for accuracy evaluation.
267+
/// When non-zero, accuracy mode generates sample indices [0, accuracy_sample_count_override).
268+
/// When zero (default), uses performance_sample_count.
269+
uint64_t accuracy_sample_count_override = 0;
266270
/// \brief Measure token latencies
267271
bool use_token_latencies = false;
268272
/// Token latency parameters

loadgen/test_settings_internal.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ TestSettingsInternal::TestSettingsInternal(
124124
? qsl_performance_sample_count
125125
: requested.performance_sample_count_override;
126126

127+
// Accuracy Sample Count: Override -> performance_sample_count (default)
128+
accuracy_sample_count = (requested.accuracy_sample_count_override == 0)
129+
? performance_sample_count
130+
: requested.accuracy_sample_count_override;
131+
127132
// Sample by concatentating several permutations of the dataset
128133
// sample_concatenate_permutation
129134
sample_concatenate_permutation =
@@ -336,6 +341,8 @@ void LogRequestedTestSettings(const TestSettings &s) {
336341
s.performance_issue_same_index);
337342
MLPERF_LOG(detail, "requested_performance_sample_count_override",
338343
s.performance_sample_count_override);
344+
MLPERF_LOG(detail, "requested_accuracy_sample_count_override",
345+
s.accuracy_sample_count_override);
339346
MLPERF_LOG(detail, "requested_sample_concatenate_permutation",
340347
s.sample_concatenate_permutation);
341348
// Token latencies specific values
@@ -407,6 +414,8 @@ void LogRequestedTestSettings(const TestSettings &s) {
407414
detail("performance_issue_same_index : ", s.performance_issue_same_index);
408415
detail("performance_sample_count_override : ",
409416
s.performance_sample_count_override);
417+
detail("accuracy_sample_count_override : ",
418+
s.accuracy_sample_count_override);
410419
detail("");
411420
#endif
412421
});
@@ -450,6 +459,8 @@ void TestSettingsInternal::LogEffectiveSettings() const {
450459
s.performance_issue_same_index);
451460
MLPERF_LOG(detail, "effective_performance_sample_count",
452461
s.performance_sample_count);
462+
MLPERF_LOG(detail, "effective_accuracy_sample_count",
463+
s.accuracy_sample_count);
453464
MLPERF_LOG(detail, "effective_sample_concatenate_permutation",
454465
s.sample_concatenate_permutation);
455466
#else
@@ -481,6 +492,7 @@ void TestSettingsInternal::LogEffectiveSettings() const {
481492
detail("performance_issue_same : ", s.performance_issue_same);
482493
detail("performance_issue_same_index : ", s.performance_issue_same_index);
483494
detail("performance_sample_count : ", s.performance_sample_count);
495+
detail("accuracy_sample_count : ", s.accuracy_sample_count);
484496
#endif
485497
});
486498
}
@@ -515,6 +527,7 @@ void TestSettingsInternal::LogSummary(AsyncSummary &summary) const {
515527
summary("performance_issue_same : ", performance_issue_same);
516528
summary("performance_issue_same_index : ", performance_issue_same_index);
517529
summary("performance_sample_count : ", performance_sample_count);
530+
summary("accuracy_sample_count : ", accuracy_sample_count);
518531
if (sample_concatenate_permutation) {
519532
summary(
520533
"WARNING: sample_concatenate_permutation was set to true. \n"
@@ -779,6 +792,8 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
779792
lookupkv(model, scenario, "max_query_count", &max_query_count, nullptr);
780793
lookupkv(model, scenario, "performance_sample_count_override",
781794
&performance_sample_count_override, nullptr);
795+
lookupkv(model, scenario, "accuracy_sample_count_override",
796+
&accuracy_sample_count_override, nullptr);
782797
lookupkv(model, "SingleStream", "target_latency", nullptr,
783798
&single_stream_expected_latency_ns, 1000 * 1000);
784799
lookupkv(model, "MultiStream", "target_latency", nullptr,

loadgen/test_settings_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ struct TestSettingsInternal {
8080
bool performance_issue_same;
8181
uint64_t performance_issue_same_index;
8282
uint64_t performance_sample_count;
83+
uint64_t accuracy_sample_count;
8384

8485
bool sample_concatenate_permutation;
8586
bool use_token_latencies = false;

0 commit comments

Comments
 (0)