From 940612dd1556917a7448948565ff6b25e8d851d5 Mon Sep 17 00:00:00 2001 From: Shobhit Verma Date: Wed, 17 Dec 2025 13:13:06 +0000 Subject: [PATCH 1/4] add accuracy_sample_count --- loadgen/bindings/python_api.cc | 2 ++ loadgen/loadgen.cc | 31 +++++++++++++++++++++---------- loadgen/test_settings.h | 4 ++++ loadgen/test_settings_internal.cc | 15 +++++++++++++++ loadgen/test_settings_internal.h | 1 + 5 files changed, 43 insertions(+), 10 deletions(-) diff --git a/loadgen/bindings/python_api.cc b/loadgen/bindings/python_api.cc index 96396dab92..1b6f9034ec 100644 --- a/loadgen/bindings/python_api.cc +++ b/loadgen/bindings/python_api.cc @@ -333,6 +333,8 @@ PYBIND11_MODULE(mlperf_loadgen, m) { &TestSettings::performance_issue_same_index) .def_readwrite("performance_sample_count_override", &TestSettings::performance_sample_count_override) + .def_readwrite("accuracy_sample_count_override", + &TestSettings::accuracy_sample_count_override) .def_readwrite("test05", &TestSettings::test05) .def_readwrite("test05_qsl_rng_seed", &TestSettings::test05_qsl_rng_seed) .def_readwrite("test05_sample_index_rng_seed", diff --git a/loadgen/loadgen.cc b/loadgen/loadgen.cc index 42b2140de2..28662a32c1 100644 --- a/loadgen/loadgen.cc +++ b/loadgen/loadgen.cc @@ -621,18 +621,23 @@ void LoadSamplesToRam(QuerySampleLibrary* qsl, /// \brief Generates random sets of samples in the QSL that we can load into /// RAM at the same time. +/// \param qsl The query sample library. +/// \param settings The test settings. +/// \param total_sample_count The total number of samples to generate indices for. +/// In accuracy mode, this should be accuracy_sample_count. +/// In performance mode, this should be performance_sample_count. std::vector GenerateLoadableSets( - QuerySampleLibrary* qsl, const TestSettingsInternal& settings) { + QuerySampleLibrary* qsl, const TestSettingsInternal& settings, + size_t total_sample_count) { auto tracer = MakeScopedTracer( [](AsyncTrace& trace) { trace("GenerateLoadableSets"); }); std::vector result; std::mt19937 qsl_rng(settings.qsl_rng_seed); - // Generate indices for all available samples in the QSL. - const size_t qsl_total_count = qsl->TotalSampleCount(); - std::vector samples(qsl_total_count); - for (size_t i = 0; i < qsl_total_count; i++) { + // Generate indices for the specified sample count. + std::vector samples(total_sample_count); + for (size_t i = 0; i < total_sample_count; i++) { samples[i] = static_cast(i); } @@ -754,7 +759,8 @@ std::pair FindBoundaries( }); std::vector loadable_sets( - loadgen::GenerateLoadableSets(qsl, u_settings)); + loadgen::GenerateLoadableSets(qsl, u_settings, + u_settings.performance_sample_count)); const LoadableSampleSet& performance_set = loadable_sets.front(); LoadSamplesToRam(qsl, performance_set.set); @@ -841,7 +847,8 @@ void RunPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, // Use first loadable set as the performance set. std::vector loadable_sets( - loadgen::GenerateLoadableSets(qsl, settings)); + loadgen::GenerateLoadableSets(qsl, settings, + settings.performance_sample_count)); const LoadableSampleSet& performance_set = loadable_sets.front(); LoadSamplesToRam(qsl, performance_set.set); @@ -974,7 +981,8 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, // 1. Check whether the lower bound came from user satisfy performance // constraints or not. std::vector base_loadable_sets( - loadgen::GenerateLoadableSets(qsl, base_settings)); + loadgen::GenerateLoadableSets(qsl, base_settings, + base_settings.performance_sample_count)); const LoadableSampleSet& base_performance_set = base_loadable_sets.front(); LoadSamplesToRam(qsl, base_performance_set.set); @@ -1044,7 +1052,9 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, // Reuse performance_set, u_perf_summary has the largest 'samples_per_query'. std::vector loadable_sets( - loadgen::GenerateLoadableSets(qsl, u_perf_summary.settings)); + loadgen::GenerateLoadableSets( + qsl, u_perf_summary.settings, + u_perf_summary.settings.performance_sample_count)); const LoadableSampleSet& performance_set = loadable_sets.front(); LoadSamplesToRam(qsl, performance_set.set); @@ -1089,7 +1099,8 @@ void RunAccuracyMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, }); std::vector loadable_sets( - loadgen::GenerateLoadableSets(qsl, settings)); + loadgen::GenerateLoadableSets(qsl, settings, + settings.accuracy_sample_count)); for (auto& loadable_set : loadable_sets) { { diff --git a/loadgen/test_settings.h b/loadgen/test_settings.h index 584d073bb8..624b9cd99f 100644 --- a/loadgen/test_settings.h +++ b/loadgen/test_settings.h @@ -263,6 +263,10 @@ struct TestSettings { uint64_t performance_issue_same_index = 0; /// \brief Overrides QSL->PerformanceSampleCount() when non-zero uint64_t performance_sample_count_override = 0; + /// \brief Specifies the number of samples for accuracy evaluation. + /// When non-zero, accuracy mode generates sample indices [0, accuracy_sample_count_override). + /// When zero (default), uses performance_sample_count. + uint64_t accuracy_sample_count_override = 0; /// \brief Measure token latencies bool use_token_latencies = false; /// Token latency parameters diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc index 3f2cd88473..2029ea0c96 100644 --- a/loadgen/test_settings_internal.cc +++ b/loadgen/test_settings_internal.cc @@ -124,6 +124,11 @@ TestSettingsInternal::TestSettingsInternal( ? qsl_performance_sample_count : requested.performance_sample_count_override; + // Accuracy Sample Count: Override -> performance_sample_count (default) + accuracy_sample_count = (requested.accuracy_sample_count_override == 0) + ? performance_sample_count + : requested.accuracy_sample_count_override; + // Sample by concatentating several permutations of the dataset // sample_concatenate_permutation sample_concatenate_permutation = @@ -336,6 +341,8 @@ void LogRequestedTestSettings(const TestSettings &s) { s.performance_issue_same_index); MLPERF_LOG(detail, "requested_performance_sample_count_override", s.performance_sample_count_override); + MLPERF_LOG(detail, "requested_accuracy_sample_count_override", + s.accuracy_sample_count_override); MLPERF_LOG(detail, "requested_sample_concatenate_permutation", s.sample_concatenate_permutation); // Token latencies specific values @@ -407,6 +414,8 @@ void LogRequestedTestSettings(const TestSettings &s) { detail("performance_issue_same_index : ", s.performance_issue_same_index); detail("performance_sample_count_override : ", s.performance_sample_count_override); + detail("accuracy_sample_count_override : ", + s.accuracy_sample_count_override); detail(""); #endif }); @@ -450,6 +459,8 @@ void TestSettingsInternal::LogEffectiveSettings() const { s.performance_issue_same_index); MLPERF_LOG(detail, "effective_performance_sample_count", s.performance_sample_count); + MLPERF_LOG(detail, "effective_accuracy_sample_count", + s.accuracy_sample_count); MLPERF_LOG(detail, "effective_sample_concatenate_permutation", s.sample_concatenate_permutation); #else @@ -481,6 +492,7 @@ void TestSettingsInternal::LogEffectiveSettings() const { detail("performance_issue_same : ", s.performance_issue_same); detail("performance_issue_same_index : ", s.performance_issue_same_index); detail("performance_sample_count : ", s.performance_sample_count); + detail("accuracy_sample_count : ", s.accuracy_sample_count); #endif }); } @@ -515,6 +527,7 @@ void TestSettingsInternal::LogSummary(AsyncSummary &summary) const { summary("performance_issue_same : ", performance_issue_same); summary("performance_issue_same_index : ", performance_issue_same_index); summary("performance_sample_count : ", performance_sample_count); + summary("accuracy_sample_count : ", accuracy_sample_count); if (sample_concatenate_permutation) { summary( "WARNING: sample_concatenate_permutation was set to true. \n" @@ -779,6 +792,8 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model, lookupkv(model, scenario, "max_query_count", &max_query_count, nullptr); lookupkv(model, scenario, "performance_sample_count_override", &performance_sample_count_override, nullptr); + lookupkv(model, scenario, "accuracy_sample_count_override", + &accuracy_sample_count_override, nullptr); lookupkv(model, "SingleStream", "target_latency", nullptr, &single_stream_expected_latency_ns, 1000 * 1000); lookupkv(model, "MultiStream", "target_latency", nullptr, diff --git a/loadgen/test_settings_internal.h b/loadgen/test_settings_internal.h index ab2773bd18..e93432bd0a 100644 --- a/loadgen/test_settings_internal.h +++ b/loadgen/test_settings_internal.h @@ -80,6 +80,7 @@ struct TestSettingsInternal { bool performance_issue_same; uint64_t performance_issue_same_index; uint64_t performance_sample_count; + uint64_t accuracy_sample_count; bool sample_concatenate_permutation; bool use_token_latencies = false; From f424dee60f9502bd9a84dfd36dc202fe2d80a224 Mon Sep 17 00:00:00 2001 From: Shobhit Verma Date: Wed, 17 Dec 2025 13:25:05 +0000 Subject: [PATCH 2/4] cap count to QSL->TotalSampleCount() --- loadgen/loadgen.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/loadgen/loadgen.cc b/loadgen/loadgen.cc index 28662a32c1..33f137051c 100644 --- a/loadgen/loadgen.cc +++ b/loadgen/loadgen.cc @@ -635,9 +635,13 @@ std::vector GenerateLoadableSets( std::vector result; std::mt19937 qsl_rng(settings.qsl_rng_seed); + // Cap sample count to QSL's actual total to avoid out-of-range indices. + const size_t qsl_total_count = qsl->TotalSampleCount(); + const size_t effective_sample_count = std::min(total_sample_count, qsl_total_count); + // Generate indices for the specified sample count. - std::vector samples(total_sample_count); - for (size_t i = 0; i < total_sample_count; i++) { + std::vector samples(effective_sample_count); + for (size_t i = 0; i < effective_sample_count; i++) { samples[i] = static_cast(i); } From 6368a90a6ca277de74ef0922f1edda415d34439d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 17 Dec 2025 13:25:46 +0000 Subject: [PATCH 3/4] [Automated Commit] Format Codebase --- loadgen/loadgen.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loadgen/loadgen.cc b/loadgen/loadgen.cc index 33f137051c..06f51ee66c 100644 --- a/loadgen/loadgen.cc +++ b/loadgen/loadgen.cc @@ -623,7 +623,8 @@ void LoadSamplesToRam(QuerySampleLibrary* qsl, /// RAM at the same time. /// \param qsl The query sample library. /// \param settings The test settings. -/// \param total_sample_count The total number of samples to generate indices for. +/// \param total_sample_count The total number of samples to generate indices +/// for. /// In accuracy mode, this should be accuracy_sample_count. /// In performance mode, this should be performance_sample_count. std::vector GenerateLoadableSets( @@ -637,7 +638,8 @@ std::vector GenerateLoadableSets( // Cap sample count to QSL's actual total to avoid out-of-range indices. const size_t qsl_total_count = qsl->TotalSampleCount(); - const size_t effective_sample_count = std::min(total_sample_count, qsl_total_count); + const size_t effective_sample_count = + std::min(total_sample_count, qsl_total_count); // Generate indices for the specified sample count. std::vector samples(effective_sample_count); From 3f2f71924c1cbe88f74b5e93e3ab90a0e9a0dd0f Mon Sep 17 00:00:00 2001 From: Shobhit Verma Date: Wed, 17 Dec 2025 13:31:37 +0000 Subject: [PATCH 4/4] empty commit to re-trigger test