Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions loadgen/bindings/python_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
&TestSettings::performance_issue_same_index)
.def_readwrite("performance_sample_count_override",
&TestSettings::performance_sample_count_override)
.def_readwrite("accuracy_sample_count_override",
&TestSettings::accuracy_sample_count_override)
.def_readwrite("test05", &TestSettings::test05)
.def_readwrite("test05_qsl_rng_seed", &TestSettings::test05_qsl_rng_seed)
.def_readwrite("test05_sample_index_rng_seed",
Expand Down
35 changes: 26 additions & 9 deletions loadgen/loadgen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -621,18 +621,29 @@ void LoadSamplesToRam(QuerySampleLibrary* qsl,

/// \brief Generates random sets of samples in the QSL that we can load into
/// RAM at the same time.
/// \param qsl The query sample library.
/// \param settings The test settings.
/// \param total_sample_count The total number of samples to generate indices
/// for.
/// In accuracy mode, this should be accuracy_sample_count.
/// In performance mode, this should be performance_sample_count.
std::vector<LoadableSampleSet> GenerateLoadableSets(
QuerySampleLibrary* qsl, const TestSettingsInternal& settings) {
QuerySampleLibrary* qsl, const TestSettingsInternal& settings,
size_t total_sample_count) {
auto tracer = MakeScopedTracer(
[](AsyncTrace& trace) { trace("GenerateLoadableSets"); });

std::vector<LoadableSampleSet> result;
std::mt19937 qsl_rng(settings.qsl_rng_seed);

// Generate indices for all available samples in the QSL.
// Cap sample count to QSL's actual total to avoid out-of-range indices.
const size_t qsl_total_count = qsl->TotalSampleCount();
std::vector<QuerySampleIndex> samples(qsl_total_count);
for (size_t i = 0; i < qsl_total_count; i++) {
const size_t effective_sample_count =
std::min(total_sample_count, qsl_total_count);

// Generate indices for the specified sample count.
std::vector<QuerySampleIndex> samples(effective_sample_count);
for (size_t i = 0; i < effective_sample_count; i++) {
samples[i] = static_cast<QuerySampleIndex>(i);
}

Expand Down Expand Up @@ -754,7 +765,8 @@ std::pair<PerformanceSummary, PerformanceSummary> FindBoundaries(
});

std::vector<loadgen::LoadableSampleSet> loadable_sets(
loadgen::GenerateLoadableSets(qsl, u_settings));
loadgen::GenerateLoadableSets(qsl, u_settings,
u_settings.performance_sample_count));
const LoadableSampleSet& performance_set = loadable_sets.front();
LoadSamplesToRam(qsl, performance_set.set);

Expand Down Expand Up @@ -841,7 +853,8 @@ void RunPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,

// Use first loadable set as the performance set.
std::vector<loadgen::LoadableSampleSet> loadable_sets(
loadgen::GenerateLoadableSets(qsl, settings));
loadgen::GenerateLoadableSets(qsl, settings,
settings.performance_sample_count));
const LoadableSampleSet& performance_set = loadable_sets.front();
LoadSamplesToRam(qsl, performance_set.set);

Expand Down Expand Up @@ -974,7 +987,8 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
// 1. Check whether the lower bound came from user satisfy performance
// constraints or not.
std::vector<loadgen::LoadableSampleSet> base_loadable_sets(
loadgen::GenerateLoadableSets(qsl, base_settings));
loadgen::GenerateLoadableSets(qsl, base_settings,
base_settings.performance_sample_count));
const LoadableSampleSet& base_performance_set = base_loadable_sets.front();
LoadSamplesToRam(qsl, base_performance_set.set);

Expand Down Expand Up @@ -1044,7 +1058,9 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,

// Reuse performance_set, u_perf_summary has the largest 'samples_per_query'.
std::vector<loadgen::LoadableSampleSet> loadable_sets(
loadgen::GenerateLoadableSets(qsl, u_perf_summary.settings));
loadgen::GenerateLoadableSets(
qsl, u_perf_summary.settings,
u_perf_summary.settings.performance_sample_count));
const LoadableSampleSet& performance_set = loadable_sets.front();
LoadSamplesToRam(qsl, performance_set.set);

Expand Down Expand Up @@ -1089,7 +1105,8 @@ void RunAccuracyMode(SystemUnderTest* sut, QuerySampleLibrary* qsl,
});

std::vector<loadgen::LoadableSampleSet> loadable_sets(
loadgen::GenerateLoadableSets(qsl, settings));
loadgen::GenerateLoadableSets(qsl, settings,
settings.accuracy_sample_count));

for (auto& loadable_set : loadable_sets) {
{
Expand Down
4 changes: 4 additions & 0 deletions loadgen/test_settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ struct TestSettings {
uint64_t performance_issue_same_index = 0;
/// \brief Overrides QSL->PerformanceSampleCount() when non-zero
uint64_t performance_sample_count_override = 0;
/// \brief Specifies the number of samples for accuracy evaluation.
/// When non-zero, accuracy mode generates sample indices [0, accuracy_sample_count_override).
/// When zero (default), uses performance_sample_count.
uint64_t accuracy_sample_count_override = 0;
/// \brief Measure token latencies
bool use_token_latencies = false;
/// Token latency parameters
Expand Down
15 changes: 15 additions & 0 deletions loadgen/test_settings_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ TestSettingsInternal::TestSettingsInternal(
? qsl_performance_sample_count
: requested.performance_sample_count_override;

// Accuracy Sample Count: Override -> performance_sample_count (default)
accuracy_sample_count = (requested.accuracy_sample_count_override == 0)
? performance_sample_count
: requested.accuracy_sample_count_override;

// Sample by concatentating several permutations of the dataset
// sample_concatenate_permutation
sample_concatenate_permutation =
Expand Down Expand Up @@ -336,6 +341,8 @@ void LogRequestedTestSettings(const TestSettings &s) {
s.performance_issue_same_index);
MLPERF_LOG(detail, "requested_performance_sample_count_override",
s.performance_sample_count_override);
MLPERF_LOG(detail, "requested_accuracy_sample_count_override",
s.accuracy_sample_count_override);
MLPERF_LOG(detail, "requested_sample_concatenate_permutation",
s.sample_concatenate_permutation);
// Token latencies specific values
Expand Down Expand Up @@ -407,6 +414,8 @@ void LogRequestedTestSettings(const TestSettings &s) {
detail("performance_issue_same_index : ", s.performance_issue_same_index);
detail("performance_sample_count_override : ",
s.performance_sample_count_override);
detail("accuracy_sample_count_override : ",
s.accuracy_sample_count_override);
detail("");
#endif
});
Expand Down Expand Up @@ -450,6 +459,8 @@ void TestSettingsInternal::LogEffectiveSettings() const {
s.performance_issue_same_index);
MLPERF_LOG(detail, "effective_performance_sample_count",
s.performance_sample_count);
MLPERF_LOG(detail, "effective_accuracy_sample_count",
s.accuracy_sample_count);
MLPERF_LOG(detail, "effective_sample_concatenate_permutation",
s.sample_concatenate_permutation);
#else
Expand Down Expand Up @@ -481,6 +492,7 @@ void TestSettingsInternal::LogEffectiveSettings() const {
detail("performance_issue_same : ", s.performance_issue_same);
detail("performance_issue_same_index : ", s.performance_issue_same_index);
detail("performance_sample_count : ", s.performance_sample_count);
detail("accuracy_sample_count : ", s.accuracy_sample_count);
#endif
});
}
Expand Down Expand Up @@ -515,6 +527,7 @@ void TestSettingsInternal::LogSummary(AsyncSummary &summary) const {
summary("performance_issue_same : ", performance_issue_same);
summary("performance_issue_same_index : ", performance_issue_same_index);
summary("performance_sample_count : ", performance_sample_count);
summary("accuracy_sample_count : ", accuracy_sample_count);
if (sample_concatenate_permutation) {
summary(
"WARNING: sample_concatenate_permutation was set to true. \n"
Expand Down Expand Up @@ -779,6 +792,8 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
lookupkv(model, scenario, "max_query_count", &max_query_count, nullptr);
lookupkv(model, scenario, "performance_sample_count_override",
&performance_sample_count_override, nullptr);
lookupkv(model, scenario, "accuracy_sample_count_override",
&accuracy_sample_count_override, nullptr);
lookupkv(model, "SingleStream", "target_latency", nullptr,
&single_stream_expected_latency_ns, 1000 * 1000);
lookupkv(model, "MultiStream", "target_latency", nullptr,
Expand Down
1 change: 1 addition & 0 deletions loadgen/test_settings_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ struct TestSettingsInternal {
bool performance_issue_same;
uint64_t performance_issue_same_index;
uint64_t performance_sample_count;
uint64_t accuracy_sample_count;

bool sample_concatenate_permutation;
bool use_token_latencies = false;
Expand Down
Loading