Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@ Manifest.toml
docs/Manifest.toml

# Project specific ignores below
# generated example artifacts
# generated example artifacts
/examples/**/plots/
/examples/**/trajectories/

# benchmark output artifacts
/benchmark/results/

# external pkgs and configs
pardiso.lic
/.CondaPkg/
Expand Down
17 changes: 17 additions & 0 deletions benchmark/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[deps]
DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56"
ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
HarmoniqsBenchmarks = "f45d0b76-2d23-4568-9599-481e0da131db"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[sources]
DirectTrajOpt = {path = ".."}
# TODO: drop rev pin once HarmoniqsBenchmarks.jl#1 (feat/alloc-profile) merges
HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "feat/alloc-profile"}
133 changes: 133 additions & 0 deletions benchmark/alloc_profile.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# =============================================================================
# Ipopt + MadNLP allocation profile — bilinear toy problem
#
# Runs `solve!` once per solver under Profile.Allocs via benchmark_memory!
# from HarmoniqsBenchmarks.jl and saves the sampled trace to
# benchmark/results/allocs/ for hot-path triage. The Piccolissimo alloc-
# profile testitem covers the Altissimo side; this script is the sibling
# for the in-tree NLP solvers.
#
# Uses the same `bilinear_dynamics_and_trajectory` fixture the main test
# suite uses, so the profiled problem is deterministic and small (N=10,
# 4-state × 2-control) — we care about allocation *patterns*, not absolute
# counts on a production-size problem.
#
# Run:
# julia --project=benchmark benchmark/alloc_profile.jl
# =============================================================================

using Random
using NamedTrajectories
using SparseArrays
using LinearAlgebra
using DirectTrajOpt
using MathOptInterface
const MOI = MathOptInterface
using Ipopt
using MadNLP
using HarmoniqsBenchmarks

# Resolve the MadNLPSolverExt extension module so MadNLPOptions is accessible
# (matches the pattern used in Piccolissimo.jl/benchmark/benchmarks.jl).
const MadNLPSolverExt = [
mod for mod in reverse(Base.loaded_modules_order)
if Symbol(mod) == :MadNLPSolverExt
][1]

# Pull in the bilinear fixture without duplicating it.
include(joinpath(@__DIR__, "..", "test", "test_utils.jl"))

Random.seed!(42)

const RESULTS_DIR = joinpath(@__DIR__, "results", "allocs")
mkpath(RESULTS_DIR)

# ----------------------------------------------------------------------------
# Problem builder — wraps the shared fixture with a QuadraticRegularizer-style
# objective so both Ipopt and MadNLP see the same NLP.
# ----------------------------------------------------------------------------
function build_problem(; N = 10)
G, traj = bilinear_dynamics_and_trajectory(; N = N)

integrators = [
BilinearIntegrator(G, :x, :u, traj),
DerivativeIntegrator(:u, :du, traj),
DerivativeIntegrator(:du, :ddu, traj),
]

J = TerminalObjective(x -> norm(x - traj.goal.x)^2, :x, traj)
J += QuadraticRegularizer(:u, traj, 1.0)

prob = DirectTrajOptProblem(traj, J, integrators)
return prob, traj
end

# ----------------------------------------------------------------------------
# Profile one solver. Warmup runs on a throwaway deepcopy so JIT/compile
# allocations stay out of the recorded trace.
# ----------------------------------------------------------------------------
function profile_solver(; solver_name, options_ctor, N = 10, sample_rate = 1.0)
prob_warmup, traj = build_problem(; N = N)
prob_profiled, _ = build_problem(; N = N)

state_dim = traj.dims[:x]
ctrl_dim = sum(traj.dims[cn] for cn in traj.control_names if cn != traj.timestep; init = 0)

println("\n[$(solver_name)] JIT warmup on throwaway problem copy...")
DirectTrajOpt.solve!(prob_warmup; options = options_ctor())

println("[$(solver_name)] Profiling allocations (sample_rate=$(sample_rate))...")
profile = benchmark_memory!(
package = "DirectTrajOpt",
solver = solver_name,
benchmark_name = "bilinear_N$(N)_$(lowercase(solver_name))",
N = traj.N,
state_dim = state_dim,
control_dim = ctrl_dim,
sample_rate = sample_rate,
warmup = false,
runner = "local",
) do
DirectTrajOpt.solve!(prob_profiled; options = options_ctor())
end

mb = profile.total_bytes / (1024 * 1024)
println("[$(solver_name)] captured $(profile.total_count) samples, $(round(mb; digits=2)) MB total")

path = save_alloc_profile(RESULTS_DIR, profile.benchmark_name, profile)
println("[$(solver_name)] saved to $(path)")
return profile, path
end

# ----------------------------------------------------------------------------
# Entry points
#
# sample_rate default is 0.01 because Ipopt/MadNLP generate orders of magnitude
# more fine-grained allocations than the solve's wall-time budget accommodates
# at sample_rate=1.0 (an N=10 bilinear toy can hang for 15+ minutes at 1.0).
# 0.01 still gives statistically useful traces for hot-path triage.
# ----------------------------------------------------------------------------
function main(; N = 10, sample_rate = 0.01)
ipopt_profile, ipopt_path = profile_solver(;
solver_name = "Ipopt",
options_ctor = () -> IpoptOptions(max_iter = 50, print_level = 0),
N = N,
sample_rate = sample_rate,
)

madnlp_profile, madnlp_path = profile_solver(;
solver_name = "MadNLP",
options_ctor = () -> MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = Int(MadNLP.ERROR)),
N = N,
sample_rate = sample_rate,
)

println("\nDone.")
println(" Ipopt profile: $(ipopt_path) ($(ipopt_profile.total_count) samples)")
println(" MadNLP profile: $(madnlp_path) ($(madnlp_profile.total_count) samples)")
return (ipopt = ipopt_profile, madnlp = madnlp_profile)
end

if abspath(PROGRAM_FILE) == @__FILE__
main()
end
136 changes: 136 additions & 0 deletions benchmark/analyze_allocs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
using HarmoniqsBenchmarks
using Printf

const DEFAULT_RESULTS_DIR = joinpath(@__DIR__, "results", "allocs")
results_dir() = isempty(ARGS) ? DEFAULT_RESULTS_DIR : ARGS[1]

# Noise filters — frames / types from Profile.Allocs itself or the Julia
# toplevel/runtime that do not tell us anything about user-code hotpaths.
const NOISE_FRAME_PATTERNS = [
"Profile.Allocs",
"gc-alloc-profiler",
"gc-stock.c",
"gc.c:",
"jl_apply",
"jl_toplevel_",
"ijl_toplevel_",
"jl_interpret_toplevel_thunk",
"jl_repl_entrypoint",
"interpreter.c",
"_include(",
"include_string(",
"loading.jl",
"client.jl",
"_start() at sys.so",
"ip:0x",
"_start at ",
" at Base.jl:",
"true_main at jlapi.c",
"__libc_start_main",
"loader_exe.c",
"jl_system_image_data",
"macro expansion at Allocs.jl",
"boot.jl:",
"jl_f__call_latest",
]

const WRAPPER_FRAME_PATTERNS = [
"alloc_profile.jl",
"benchmark_memory!",
"HarmoniqsBenchmarks",
]

const NOISE_TYPE_PATTERNS = [
"Profile.Allocs",
]

_is_noise_frame(f) = any(p -> occursin(p, f), NOISE_FRAME_PATTERNS)
_is_noise_type(t) = any(p -> occursin(p, t), NOISE_TYPE_PATTERNS)

function _first_user_frame(stack)
for f in stack
_is_noise_frame(f) && continue
any(p -> occursin(p, f), WRAPPER_FRAME_PATTERNS) && continue
return f
end
return isempty(stack) ? "<empty>" : stack[end]
end

_is_wrapper_frame(f) = any(p -> occursin(p, f), WRAPPER_FRAME_PATTERNS)

function top_frames(profile; k = 25, scale_to_total = true, drop_wrappers = true)
by_frame = Dict{String, Tuple{Int, Int}}()
for s in profile.samples
_is_noise_type(s.type_name) && continue
for frame in s.stacktrace
_is_noise_frame(frame) && continue
drop_wrappers && _is_wrapper_frame(frame) && continue
cnt, bytes = get(by_frame, frame, (0, 0))
by_frame[frame] = (cnt + 1, bytes + s.size_bytes)
end
end
ranked = sort(collect(by_frame); by = x -> -x[2][2])[1:min(k, length(by_frame))]
scale = scale_to_total ? (1 / profile.sample_rate) : 1.0
println("\nTop $(length(ranked)) user frames by allocated bytes (scaled ×$(Int(scale))):")
println(rpad(" bytes", 14), rpad("samples", 10), "frame")
for (frame, (cnt, bytes)) in ranked
@printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(frame, 140)
end
end

function top_leaf_callsites(profile; k = 25, scale_to_total = true)
by_leaf = Dict{String, Tuple{Int, Int}}()
for s in profile.samples
_is_noise_type(s.type_name) && continue
leaf = _first_user_frame(s.stacktrace)
cnt, bytes = get(by_leaf, leaf, (0, 0))
by_leaf[leaf] = (cnt + 1, bytes + s.size_bytes)
end
ranked = sort(collect(by_leaf); by = x -> -x[2][2])[1:min(k, length(by_leaf))]
scale = scale_to_total ? (1 / profile.sample_rate) : 1.0
println("\nTop $(length(ranked)) leaf call sites by allocated bytes (scaled ×$(Int(scale))):")
println(rpad(" bytes", 14), rpad("samples", 10), "leaf")
for (leaf, (cnt, bytes)) in ranked
@printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(leaf, 140)
end
end

function top_types(profile; k = 15, scale_to_total = true)
by_type = Dict{String, Tuple{Int, Int}}()
for s in profile.samples
_is_noise_type(s.type_name) && continue
cnt, bytes = get(by_type, s.type_name, (0, 0))
by_type[s.type_name] = (cnt + 1, bytes + s.size_bytes)
end
ranked = sort(collect(by_type); by = x -> -x[2][2])[1:min(k, length(by_type))]
scale = scale_to_total ? (1 / profile.sample_rate) : 1.0
println("\nTop $(length(ranked)) allocated types (scaled ×$(Int(scale))):")
println(rpad(" bytes", 14), rpad("samples", 10), "type")
for (t, (cnt, bytes)) in ranked
@printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(t, 120)
end
end

_fmt_bytes(b) = b >= 1 << 30 ? @sprintf("%.2f GB", b / (1 << 30)) :
b >= 1 << 20 ? @sprintf("%.2f MB", b / (1 << 20)) :
b >= 1 << 10 ? @sprintf("%.2f KB", b / (1 << 10)) :
@sprintf("%d B", Int(round(b)))

_truncate(s, n) = length(s) <= n ? s : string(first(s, n - 1), "…")

function main()
dir = results_dir()
files = sort(filter(f -> endswith(f, "_allocs.jld2"), readdir(dir; join = true)))
isempty(files) && (println("no *_allocs.jld2 files under $dir"); return)
for path in files
profile = load_alloc_profile(path)
println("=" ^ 100)
println(basename(path))
@printf " solver=%s N=%d sample_rate=%g samples=%d total=%s\n" profile.solver profile.N profile.sample_rate profile.total_count _fmt_bytes(profile.total_bytes)
top_types(profile; k = 10)
top_leaf_callsites(profile; k = 20)
top_frames(profile; k = 20)
end
end

main()
Loading
Loading