-
Notifications
You must be signed in to change notification settings - Fork 61
Profile callprofiler with different testcases #829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure whether to put cpp files in |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| # Copyright (c) 2026, modmesh contributors | ||
| # BSD-style license; see COPYING | ||
|
|
||
| if(CMAKE_SYSTEM_NAME STREQUAL "Linux") | ||
| find_program(GPROF_EXECUTABLE gprof) | ||
| endif() | ||
|
|
||
| if(GPROF_EXECUTABLE) | ||
| add_subdirectory(cprof) | ||
| endif() | ||
|
|
||
| # vim: set ff=unix fenc=utf8 nobomb et sw=4 ts=4 sts=4: |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| # Copyright (c) 2026, modmesh contributors | ||
| # BSD-style license; see COPYING | ||
|
|
||
| set(CPROF_GENERATED_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") | ||
| set(CPROF_GENERATOR | ||
| "${CMAKE_CURRENT_SOURCE_DIR}/generate_workload.py") | ||
| set(CPROF_SHARD_COUNT 32) | ||
| set(CPROF_GENERATED_SOURCES | ||
| "${CPROF_GENERATED_DIR}/callprofiler_workload.cpp" | ||
| "${CPROF_GENERATED_DIR}/callprofiler_workload_functions.hpp") | ||
|
|
||
| math(EXPR CPROF_LAST_SHARD "${CPROF_SHARD_COUNT} - 1") | ||
| foreach(index RANGE 0 ${CPROF_LAST_SHARD}) | ||
| list(APPEND CPROF_GENERATED_SOURCES | ||
| "${CPROF_GENERATED_DIR}/callprofiler_workload_${index}.cpp") | ||
| endforeach() | ||
|
|
||
| add_custom_command( | ||
| OUTPUT ${CPROF_GENERATED_SOURCES} | ||
| COMMAND "${PYTHON_EXECUTABLE}" "${CPROF_GENERATOR}" | ||
| --output-dir "${CPROF_GENERATED_DIR}" | ||
| --shards "${CPROF_SHARD_COUNT}" | ||
| DEPENDS "${CPROF_GENERATOR}" | ||
| VERBATIM | ||
| ) | ||
|
Comment on lines
+18
to
+25
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generate benchmarks. |
||
|
|
||
| add_executable( | ||
| callprofiler_gprof | ||
| callprofiler_gprof.cpp | ||
| callprofiler_workload.hpp | ||
| ${CPROF_GENERATED_SOURCES} | ||
| ${PROJECT_SOURCE_DIR}/cpp/modmesh/toggle/RadixTree.cpp | ||
| ) | ||
|
|
||
| target_include_directories( | ||
| callprofiler_gprof PRIVATE | ||
| "${CMAKE_CURRENT_SOURCE_DIR}" | ||
| "${CPROF_GENERATED_DIR}" | ||
| ) | ||
|
|
||
| target_compile_options( | ||
| callprofiler_gprof PRIVATE | ||
| ${COMMON_COMPILER_OPTIONS} | ||
| -pg | ||
| -fno-omit-frame-pointer | ||
| ) | ||
|
|
||
| target_link_options(callprofiler_gprof PRIVATE -pg) | ||
| if(CMAKE_SYSTEM_NAME STREQUAL "Linux") | ||
| target_link_options(callprofiler_gprof PRIVATE -no-pie) | ||
| endif() | ||
|
|
||
| # vim: set ff=unix fenc=utf8 nobomb et sw=4 ts=4 sts=4: | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| /* | ||
| * Copyright (c) 2026, modmesh contributors | ||
| * BSD-style license; see COPYING | ||
| */ | ||
|
|
||
| #define MODMESH_PROFILE 1 | ||
| #include <callprofiler_workload.hpp> | ||
|
|
||
| #include <array> | ||
| #include <chrono> | ||
| #include <cstddef> | ||
| #include <cstdlib> | ||
| #include <iostream> | ||
| #include <string_view> | ||
| #include <utility> | ||
|
|
||
| #if defined(__linux__) | ||
| #include <sys/resource.h> | ||
| #endif | ||
|
|
||
| namespace profiling | ||
| { | ||
|
|
||
| namespace workload = modmesh::profiling; | ||
|
|
||
| using clock_type = std::chrono::steady_clock; | ||
| using profiler_type = modmesh::CallProfiler; | ||
| using runner_type = void (*)(std::size_t); | ||
|
|
||
| struct case_definition | ||
| { | ||
| std::string_view m_label; | ||
| runner_type m_runner; | ||
| }; | ||
|
|
||
| std::array<case_definition, 4> const case_definitions{{ | ||
| {"wide_siblings", &workload::run_wide_siblings}, | ||
| {"deep_chain", &workload::run_deep_chain}, | ||
| {"balanced_tree", &workload::run_balanced_tree}, | ||
| {"hot_name_reuse", &workload::run_hot_name_reuse}, | ||
| }}; | ||
|
Comment on lines
+36
to
+41
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 4 kinds of benchmark. They are generated by python scripts. |
||
|
|
||
| void configure_large_stack() | ||
| { | ||
| #if defined(__linux__) | ||
| rlimit limit{}; | ||
| if (getrlimit(RLIMIT_STACK, &limit) == 0) | ||
| { | ||
| if (RLIM_INFINITY == limit.rlim_max || limit.rlim_cur < limit.rlim_max) | ||
| { | ||
| limit.rlim_cur = limit.rlim_max; | ||
| static_cast<void>(setrlimit(RLIMIT_STACK, &limit)); | ||
| } | ||
| } | ||
| #endif | ||
| } | ||
|
Comment on lines
+43
to
+56
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Configure enough stack size at first, because the depth of callers may be 50000. |
||
|
|
||
| template <typename Runner> | ||
| void run_case(std::string_view label, std::size_t operation_count, std::size_t repeat_count, Runner && runner) | ||
| { | ||
| profiler_type & profiler = profiler_type::instance(); | ||
| std::chrono::duration<double> elapsed{0.0}; | ||
|
|
||
| for (std::size_t repeat = 0; repeat < repeat_count; ++repeat) | ||
| { | ||
| profiler.reset(); | ||
|
|
||
| auto const start_time = clock_type::now(); | ||
| std::forward<Runner>(runner)(); | ||
| auto const stop_time = clock_type::now(); | ||
|
|
||
| elapsed += stop_time - start_time; | ||
| } | ||
|
|
||
| std::cout << "RESULT workload=" << label | ||
| << " operations=" << operation_count | ||
| << " repeats=" << repeat_count | ||
| << " workload_seconds=" << elapsed.count() | ||
| << '\n'; | ||
|
Comment on lines
+75
to
+79
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file will print the wall time of benchmark, because we cannot obtain wall time from gprof |
||
|
|
||
| profiler.reset(); | ||
| } | ||
|
|
||
| std::size_t parse_size(char const * value) | ||
| { | ||
| return static_cast<std::size_t>(std::strtoull(value, nullptr, 10)); | ||
| } | ||
|
|
||
| case_definition const * find_case(std::string_view label) | ||
| { | ||
| for (case_definition const & definition : case_definitions) | ||
| { | ||
| if (definition.m_label == label) | ||
| { | ||
| return &definition; | ||
| } | ||
| } | ||
| return nullptr; | ||
| } | ||
|
|
||
| bool run_named_case(std::string_view label, std::size_t size, std::size_t repeat_count) | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Run different types of functions with different hyperparameter. |
||
| { | ||
| case_definition const * definition = find_case(label); | ||
| if (definition == nullptr) | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| run_case(definition->m_label, size, repeat_count, [definition, size]() | ||
| { definition->m_runner(size); }); | ||
| return true; | ||
| } | ||
|
|
||
| } /* namespace profiling */ | ||
|
|
||
| int main(int argc, char ** argv) | ||
| { | ||
| if (argc == 4) | ||
| { | ||
| profiling::configure_large_stack(); | ||
| bool const completed = profiling::run_named_case( | ||
| argv[1], | ||
| profiling::parse_size(argv[2]), | ||
| profiling::parse_size(argv[3])); | ||
| return completed ? 0 : 2; | ||
| } | ||
|
|
||
| return 2; | ||
| } | ||
|
|
||
| // vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4: | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| #pragma once | ||
|
|
||
| /* | ||
| * Copyright (c) 2026, modmesh contributors | ||
| * BSD-style license; see COPYING | ||
| */ | ||
|
|
||
| #ifndef MODMESH_PROFILE | ||
| #define MODMESH_PROFILE 1 | ||
| #endif | ||
|
|
||
| #include <modmesh/toggle/RadixTree.hpp> | ||
|
|
||
| #include <cstddef> | ||
|
|
||
| #if defined(__clang__) || defined(__GNUC__) | ||
| #define MODMESH_CPROF_NOINLINE __attribute__((noinline)) | ||
| #define MODMESH_CPROF_NOINST __attribute__((no_instrument_function)) | ||
| #else | ||
| #define MODMESH_CPROF_NOINLINE | ||
| #define MODMESH_CPROF_NOINST | ||
| #endif | ||
|
|
||
| namespace modmesh::profiling | ||
| { | ||
|
|
||
| void run_wide_siblings(std::size_t size); | ||
| void run_deep_chain(std::size_t size); | ||
| void run_balanced_tree(std::size_t size); | ||
| void run_hot_name_reuse(std::size_t size); | ||
|
|
||
| namespace detail | ||
| { | ||
|
|
||
| enum class WorkloadShape | ||
| { | ||
| flat, | ||
| list, | ||
| tree, | ||
| }; | ||
|
|
||
| using profile_function_type = void (*)(std::size_t, std::size_t); | ||
|
|
||
| extern WorkloadShape active_shape; | ||
| extern std::size_t active_size; | ||
|
|
||
| void call_profile_function(std::size_t index, std::size_t begin, std::size_t end); | ||
|
|
||
| } /* namespace detail */ | ||
|
|
||
| } /* namespace modmesh::profiling */ | ||
|
|
||
| // vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Profile profiler only on linux.