Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/profiling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ jobs:
run: |
make pyprof
- name: make cprof
if: runner.os == 'Linux'
run: |
make cprof
Comment on lines +77 to +81
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Profile profiler only on linux.

send_email_on_failure:
needs: [profile]
if: ${{ always() && (needs.*.result == 'failure') && github.ref_name == 'master' && github.event.repository.fork == false }}
Expand Down
2 changes: 2 additions & 0 deletions CMakeLists.txt
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure whether to put cpp files in /profiling.

Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ else()
endif()
endif()

add_subdirectory(profiling)

add_subdirectory(cpp/binary/pymod_modmesh)
if(BUILD_QT)
add_subdirectory(cpp/binary/pilot)
Expand Down
24 changes: 22 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ ifneq ($(VERBOSE),)
else
PYTEST_OPTS ?=
endif
GPROF ?= gprof

.PHONY: default
default: buildext
Expand Down Expand Up @@ -143,6 +144,25 @@ pyprof: buildext $(PROFFILES)
$(WHICH_PYTHON) $${fn} > $${outfn} || exit 1; \
done

.PHONY: cprof
cprof: cmake
@test "$$(uname -s)" = "Linux" || { \
echo "Error: make cprof is only supported on Linux."; \
exit 1; \
}
@command -v $(GPROF) >/dev/null 2>&1 || { \
echo "Error: '$(GPROF)' not found in PATH."; \
exit 1; \
}
cmake --build $(BUILD_PATH) --target callprofiler_gprof VERBOSE=$(VERBOSE) $(MAKE_PARALLEL)
mkdir -p profiling/results
rm -f profiling/results/profile_profiler.output
env $(RUNENV) $(WHICH_PYTHON) $(MODMESH_ROOT)/profiling/cprof/run.py \
--executable $(MODMESH_ROOT)/$(BUILD_PATH)/profiling/cprof/callprofiler_gprof \
--gprof $(GPROF) \
--result-dir $(MODMESH_ROOT)/profiling/results \
--working-dir $(MODMESH_ROOT)/$(BUILD_PATH)/profiling/cprof

.PHONY: pilot
pilot: cmake
cmake --build $(BUILD_PATH) --target $@ VERBOSE=$(VERBOSE) $(MAKE_PARALLEL)
Expand Down Expand Up @@ -198,7 +218,7 @@ AUTOPEP8_OPTS ?= --recursive --max-line-length=79 \
--ignore=E121,E123,E126,E201,E202,E203,E226,E241,E301,E303,E501,W503,W504 \
--exclude=thirdparty,tmp,_deps

CFFILES = $(shell find cpp gtests -type f -name '*.[ch]pp' | sort)
CFFILES = $(shell find cpp gtests profiling -type f -name '*.[ch]pp' | sort)
ifeq ($(FORCE_CLANG_FORMAT),inplace)
CFCMD ?= $(CLANG_FORMAT) -i
else
Expand Down Expand Up @@ -226,7 +246,7 @@ cformat: $(CFFILES)

.PHONY: cinclude
cinclude: $(CFFILES)
@if grep -rnE '^[[:space:]]*#[[:space:]]*include[[:space:]]*"' cpp/ gtests/ 2>/dev/null; then \
@if grep -nE '^[[:space:]]*#[[:space:]]*include[[:space:]]*"' $(CFFILES) 2>/dev/null; then \
echo "Error: use angle brackets for #include, not quotes (see lines above)."; \
exit 1; \
fi
Expand Down
12 changes: 12 additions & 0 deletions profiling/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) 2026, modmesh contributors
# BSD-style license; see COPYING

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_program(GPROF_EXECUTABLE gprof)
endif()

if(GPROF_EXECUTABLE)
add_subdirectory(cprof)
endif()

# vim: set ff=unix fenc=utf8 nobomb et sw=4 ts=4 sts=4:
53 changes: 53 additions & 0 deletions profiling/cprof/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) 2026, modmesh contributors
# BSD-style license; see COPYING

set(CPROF_GENERATED_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
set(CPROF_GENERATOR
"${CMAKE_CURRENT_SOURCE_DIR}/generate_workload.py")
set(CPROF_SHARD_COUNT 32)
set(CPROF_GENERATED_SOURCES
"${CPROF_GENERATED_DIR}/callprofiler_workload.cpp"
"${CPROF_GENERATED_DIR}/callprofiler_workload_functions.hpp")

math(EXPR CPROF_LAST_SHARD "${CPROF_SHARD_COUNT} - 1")
foreach(index RANGE 0 ${CPROF_LAST_SHARD})
list(APPEND CPROF_GENERATED_SOURCES
"${CPROF_GENERATED_DIR}/callprofiler_workload_${index}.cpp")
endforeach()

add_custom_command(
OUTPUT ${CPROF_GENERATED_SOURCES}
COMMAND "${PYTHON_EXECUTABLE}" "${CPROF_GENERATOR}"
--output-dir "${CPROF_GENERATED_DIR}"
--shards "${CPROF_SHARD_COUNT}"
DEPENDS "${CPROF_GENERATOR}"
VERBATIM
)
Comment on lines +18 to +25
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generate benchmarks.


add_executable(
callprofiler_gprof
callprofiler_gprof.cpp
callprofiler_workload.hpp
${CPROF_GENERATED_SOURCES}
${PROJECT_SOURCE_DIR}/cpp/modmesh/toggle/RadixTree.cpp
)

target_include_directories(
callprofiler_gprof PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}"
"${CPROF_GENERATED_DIR}"
)

target_compile_options(
callprofiler_gprof PRIVATE
${COMMON_COMPILER_OPTIONS}
-pg
-fno-omit-frame-pointer
)

target_link_options(callprofiler_gprof PRIVATE -pg)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_options(callprofiler_gprof PRIVATE -no-pie)
endif()

# vim: set ff=unix fenc=utf8 nobomb et sw=4 ts=4 sts=4:
131 changes: 131 additions & 0 deletions profiling/cprof/callprofiler_gprof.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright (c) 2026, modmesh contributors
* BSD-style license; see COPYING
*/

#define MODMESH_PROFILE 1
#include <callprofiler_workload.hpp>

#include <array>
#include <chrono>
#include <cstddef>
#include <cstdlib>
#include <iostream>
#include <string_view>
#include <utility>

#if defined(__linux__)
#include <sys/resource.h>
#endif

namespace profiling
{

namespace workload = modmesh::profiling;

using clock_type = std::chrono::steady_clock;
using profiler_type = modmesh::CallProfiler;
using runner_type = void (*)(std::size_t);

struct case_definition
{
std::string_view m_label;
runner_type m_runner;
};

std::array<case_definition, 4> const case_definitions{{
{"wide_siblings", &workload::run_wide_siblings},
{"deep_chain", &workload::run_deep_chain},
{"balanced_tree", &workload::run_balanced_tree},
{"hot_name_reuse", &workload::run_hot_name_reuse},
}};
Comment on lines +36 to +41
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

4 kinds of benchmark. They are generated by python scripts.


void configure_large_stack()
{
#if defined(__linux__)
rlimit limit{};
if (getrlimit(RLIMIT_STACK, &limit) == 0)
{
if (RLIM_INFINITY == limit.rlim_max || limit.rlim_cur < limit.rlim_max)
{
limit.rlim_cur = limit.rlim_max;
static_cast<void>(setrlimit(RLIMIT_STACK, &limit));
}
}
#endif
}
Comment on lines +43 to +56
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Configure enough stack size at first, because the depth of callers may be 50000.


template <typename Runner>
void run_case(std::string_view label, std::size_t operation_count, std::size_t repeat_count, Runner && runner)
{
profiler_type & profiler = profiler_type::instance();
std::chrono::duration<double> elapsed{0.0};

for (std::size_t repeat = 0; repeat < repeat_count; ++repeat)
{
profiler.reset();

auto const start_time = clock_type::now();
std::forward<Runner>(runner)();
auto const stop_time = clock_type::now();

elapsed += stop_time - start_time;
}

std::cout << "RESULT workload=" << label
<< " operations=" << operation_count
<< " repeats=" << repeat_count
<< " workload_seconds=" << elapsed.count()
<< '\n';
Comment on lines +75 to +79
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file will print the wall time of benchmark, because we cannot obtain wall time from gprof


profiler.reset();
}

std::size_t parse_size(char const * value)
{
return static_cast<std::size_t>(std::strtoull(value, nullptr, 10));
}

case_definition const * find_case(std::string_view label)
{
for (case_definition const & definition : case_definitions)
{
if (definition.m_label == label)
{
return &definition;
}
}
return nullptr;
}

bool run_named_case(std::string_view label, std::size_t size, std::size_t repeat_count)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run different types of functions with different hyperparameter.

{
case_definition const * definition = find_case(label);
if (definition == nullptr)
{
return false;
}

run_case(definition->m_label, size, repeat_count, [definition, size]()
{ definition->m_runner(size); });
return true;
}

} /* namespace profiling */

int main(int argc, char ** argv)
{
if (argc == 4)
{
profiling::configure_large_stack();
bool const completed = profiling::run_named_case(
argv[1],
profiling::parse_size(argv[2]),
profiling::parse_size(argv[3]));
return completed ? 0 : 2;
}

return 2;
}

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:
53 changes: 53 additions & 0 deletions profiling/cprof/callprofiler_workload.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#pragma once

/*
* Copyright (c) 2026, modmesh contributors
* BSD-style license; see COPYING
*/

#ifndef MODMESH_PROFILE
#define MODMESH_PROFILE 1
#endif

#include <modmesh/toggle/RadixTree.hpp>

#include <cstddef>

#if defined(__clang__) || defined(__GNUC__)
#define MODMESH_CPROF_NOINLINE __attribute__((noinline))
#define MODMESH_CPROF_NOINST __attribute__((no_instrument_function))
#else
#define MODMESH_CPROF_NOINLINE
#define MODMESH_CPROF_NOINST
#endif

namespace modmesh::profiling
{

void run_wide_siblings(std::size_t size);
void run_deep_chain(std::size_t size);
void run_balanced_tree(std::size_t size);
void run_hot_name_reuse(std::size_t size);

namespace detail
{

enum class WorkloadShape
{
flat,
list,
tree,
};

using profile_function_type = void (*)(std::size_t, std::size_t);

extern WorkloadShape active_shape;
extern std::size_t active_size;

void call_profile_function(std::size_t index, std::size_t begin, std::size_t end);

} /* namespace detail */

} /* namespace modmesh::profiling */

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:
Loading
Loading