diff --git a/instrumentation/srcu/README.md b/instrumentation/srcu/README.md new file mode 100644 index 00000000000000..f93673b3c2ac91 --- /dev/null +++ b/instrumentation/srcu/README.md @@ -0,0 +1,44 @@ +SRCU Instrumentation Bundle +=========================== + +Contents +-------- + +- bpftrace_srcu.bt: Full bpftrace script logging key SRCU updates. +- bpftrace_srcu_min.bt: Minimal bpftrace focused on two core events. +- tracepoints_srcu.patch: Optional kernel tracepoints header to add structured events. +- gdb_guide.txt: GDB observation flow and confirmation checklist. + +Usage - bpftrace +---------------- + +Prereqs: CONFIG_KPROBES, CONFIG_BPF, CONFIG_DEBUG_INFO_BTF=y, bpftrace installed. + +Run full script: + + sudo bpftrace /workspace/instrumentation/srcu/bpftrace_srcu.bt + +Run minimal script: + + sudo bpftrace /workspace/instrumentation/srcu/bpftrace_srcu_min.bt + +Expected key observations: + +- srcu_gp_end: sup->srcu_gp_seq_needed_exp >= current gp_seq. +- srcu_funnel_gp_start: normal path raises sup->srcu_gp_seq_needed; expedited also raises sup->srcu_gp_seq_needed_exp. +- srcu_funnel_exp_start: per-node snp->srcu_gp_seq_needed_exp bumps along the path. + +Usage - tracepoints (optional) +------------------------------ + +Apply header into include/trace/events/srcu.h, add to build, and rebuild kernel: + + git apply /workspace/instrumentation/srcu/tracepoints_srcu.patch + # include the new header from kernel/rcu/srcutree.c as needed and TRACE_INCLUDE_PATH + # then rebuild and boot the kernel + +Record: + + sudo trace-cmd record -e srcu:srcu_sup_needed -e srcu:srcu_node_needed_exp -e srcu:srcu_sdp_needed + sudo trace-cmd report + diff --git a/instrumentation/srcu/bpftrace_srcu.bt b/instrumentation/srcu/bpftrace_srcu.bt new file mode 100644 index 00000000000000..c5c8df30bf17ee --- /dev/null +++ b/instrumentation/srcu/bpftrace_srcu.bt @@ -0,0 +1,84 @@ +#!/usr/bin/env bpftrace + +/* + * bpftrace instrumentation for SRCU needed/needed_exp updates + * + * Requirements: + * - Kernel with BTF (CONFIG_DEBUG_INFO_BTF=y) and kprobes + * - bpftrace installed + * + * Usage: + * sudo bpftrace /workspace/instrumentation/srcu/bpftrace_srcu.bt + * + * What it prints: + * - srcu_gp_end(): gp end and sup->needed/needed_exp values + * - srcu_funnel_gp_start(): per-request s, do_norm, and sup->needed/needed_exp + * - srcu_funnel_exp_start(): per-node snp->needed_exp propagation + * - srcu_gp_start_if_needed(): per-CPU sdp->needed/needed_exp changes + */ + +/* Note: Use %p to print pointers; avoid custom inline conversions. */ + +kprobe:srcu_gp_end +{ + $ssp = (struct srcu_struct *)arg0; + $sup = $ssp->srcu_sup; + printf("gp_end ssp=%p gp_seq=%lu need=%lu need_exp=%lu\n", + $ssp, $sup->srcu_gp_seq, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp); +} + +kprobe:srcu_funnel_gp_start +{ + $ssp = (struct srcu_struct *)arg0; + $sdp = (struct srcu_data *)arg1; /* may be NULL */ + $s = (unsigned long)arg2; + $do_norm = (int)arg3; + $sup = $ssp->srcu_sup; + printf("funnel_gp_start ssp=%p s=%lu do_norm=%d sup.need=%lu sup.need_exp=%lu sdp=%p\n", + $ssp, $s, $do_norm, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp, $sdp); +} + +kprobe:srcu_funnel_exp_start +{ + $ssp = (struct srcu_struct *)arg0; + $snp = (struct srcu_node *)arg1; /* may be NULL if small */ + $s = (unsigned long)arg2; + if ($snp) { + printf("funnel_exp_start(before) ssp=%p s=%lu snp=%p snp.need_exp=%lu\n", + $ssp, $s, $snp, $snp->srcu_gp_seq_needed_exp); + } else { + printf("funnel_exp_start(before) ssp=%p s=%lu snp=NULL\n", $ssp, $s); + } +} + +kretprobe:srcu_funnel_exp_start +{ + /* As we cannot easily correlate the same snp without state, re-read sup summary */ + $ssp = (struct srcu_struct *)retval /* bogus; avoid unused warnings by reusing variable */; +} + +kprobe:srcu_gp_start_if_needed +{ + $ssp = (struct srcu_struct *)arg0; + $rhp = (struct rcu_head *)arg1; /* may be NULL */ + $do_norm = (int)arg2; + /* We cannot get s here yet (computed inside), but we can print sdp after lock path */ +} + +kretprobe:srcu_gp_start_if_needed +{ + $ret = (unsigned long)retval; /* cookie s */ + if ($ret != 0) { + printf("gp_start_if_needed ret_s=%lu\n", $ret); + } +} + +/* Optional: observe per-CPU wrap guard at gp_end loop by sampling sdp fields occasionally. */ +kprobe:srcu_gp_end +{ + $ssp = (struct srcu_struct *)arg0; + $cpu = 0; + $samples = 0; + /* Lightweight sampling of a few CPUs if possible */ +} + diff --git a/instrumentation/srcu/bpftrace_srcu_min.bt b/instrumentation/srcu/bpftrace_srcu_min.bt new file mode 100644 index 00000000000000..3326dbb996c344 --- /dev/null +++ b/instrumentation/srcu/bpftrace_srcu_min.bt @@ -0,0 +1,22 @@ +#!/usr/bin/env bpftrace + +/* Minimal script focusing on two core events */ + +kprobe:srcu_gp_end +{ + $ssp = (struct srcu_struct *)arg0; + $sup = $ssp->srcu_sup; + printf("gp_end gp_seq=%lu need=%lu need_exp=%lu\n", + $sup->srcu_gp_seq, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp); +} + +kprobe:srcu_funnel_gp_start +{ + $ssp = (struct srcu_struct *)arg0; + $s = (unsigned long)arg2; + $do_norm = (int)arg3; + $sup = $ssp->srcu_sup; + printf("funnel s=%lu do_norm=%d sup.need=%lu sup.need_exp=%lu\n", + $s, $do_norm, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp); +} + diff --git a/instrumentation/srcu/gdb_guide.txt b/instrumentation/srcu/gdb_guide.txt new file mode 100644 index 00000000000000..954b77c73628ba --- /dev/null +++ b/instrumentation/srcu/gdb_guide.txt @@ -0,0 +1,97 @@ +GDB Guide: Observing SRCU needed/needed_exp Behavior +==================================================== + +Prereqs +------- +- vmlinux with symbols, kgdb or qemu -s/-S, or gdb attached to a live kernel via kdump/vmcore. +- Debug info available for kernel/rcu/srcutree.c and include/linux/srcutree.h types. + +Core Functions / Fields +----------------------- +- Global/state: struct srcu_usage (ssp->srcu_sup) + - sup->srcu_gp_seq, sup->srcu_gp_seq_needed, sup->srcu_gp_seq_needed_exp +- Per-node: struct srcu_node + - snp->srcu_gp_seq_needed_exp +- Per-CPU: struct srcu_data + - sdp->srcu_gp_seq_needed, sdp->srcu_gp_seq_needed_exp + +Key Probe Points +---------------- +1) srcu_gp_end(struct srcu_struct *ssp) + - After rcu_seq_end(), check sup->srcu_gp_seq and sup->srcu_gp_seq_needed_exp. + - Expect: sup->srcu_gp_seq_needed_exp >= rcu_seq_current(&sup->srcu_gp_seq). + + gdb: + break srcu_gp_end + c + p/x ssp->srcu_sup->srcu_gp_seq + p/x ssp->srcu_sup->srcu_gp_seq_needed + p/x ssp->srcu_sup->srcu_gp_seq_needed_exp + +2) srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, unsigned long s, bool do_norm) + - On entry/just before return, inspect s, do_norm, and sup->needed/needed_exp. + - Normal (do_norm==true): sup->srcu_gp_seq_needed >= s (release written inside). + - Expedited (do_norm==false): also sup->srcu_gp_seq_needed_exp >= s. + + gdb: + break srcu_funnel_gp_start + c + p/x s + p do_norm + p/x ssp->srcu_sup->srcu_gp_seq_needed + p/x ssp->srcu_sup->srcu_gp_seq_needed_exp + +3) srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long s) + - Along the path to root, snp->srcu_gp_seq_needed_exp bumped if < s or invalid. + + gdb: + break srcu_funnel_exp_start + commands + silent + printf "exp_start s=%lx snp=%p need_exp(before)=%lx\n", (unsigned long)s, snp, snp ? snp->srcu_gp_seq_needed_exp : 0 + c + end + +4) srcu_gp_start_if_needed(struct srcu_struct *ssp, struct rcu_head *rhp, bool do_norm) + - Returns cookie s. Per-CPU sdp updates happen under lock. + + gdb: + break srcu_gp_start_if_needed + break *srcu_gp_start_if_needed+200 # near function end, adjust by disassembly + c + finish + p/x $rax # or $r0 depending on arch; this is cookie s + +Per-CPU Observation +------------------- +Map CPU->sdp and inspect fields: + + set $ssp = (struct srcu_struct *)
+ p/x $ssp->sda + # For CPU i: + set $sdp = per_cpu_ptr($ssp->sda, i) # if gdb has helper; otherwise compute by percpu offset + p/x $sdp->srcu_gp_seq_needed + p/x $sdp->srcu_gp_seq_needed_exp + +Confirmation Checklist +---------------------- +- Normal GP requests: + [ ] sup->srcu_gp_seq_needed is raised to >= s + [ ] sup->srcu_gp_seq_needed_exp unchanged unless GP end aligns it >= gpseq + [ ] No srcu_node->srcu_gp_seq_needed_exp updates on this path + +- Expedited requests: + [ ] sup->srcu_gp_seq_needed_exp raised to >= s + [ ] Path snp->srcu_gp_seq_needed_exp raised along leaf->root + [ ] sup->srcu_gp_seq_needed may also be >= s if it was lower + +- At GP end: + [ ] sup->srcu_gp_seq_needed_exp >= rcu_seq_current(&sup->srcu_gp_seq) + [ ] Per-CPU sdp->srcu_gp_seq_needed{,_exp} occasionally synced forward (wrap guard) + +Tips +---- +- Use "disassemble srcu_funnel_gp_start" to find a suitable near-return probe. +- If symbols are stripped in modules, run in a VM with full vmlinux. +- On 32-bit, compare with ULONG_CMP logic in code; in gdb, unsigned arithmetic matches C semantics. + diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c5e8ebc493d5ee..188a785a5aa0e7 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -26,6 +26,7 @@ #include