Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions instrumentation/srcu/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
SRCU Instrumentation Bundle
===========================

Contents
--------

- bpftrace_srcu.bt: Full bpftrace script logging key SRCU updates.
- bpftrace_srcu_min.bt: Minimal bpftrace focused on two core events.
- tracepoints_srcu.patch: Optional kernel tracepoints header to add structured events.
- gdb_guide.txt: GDB observation flow and confirmation checklist.

Usage - bpftrace
----------------

Prereqs: CONFIG_KPROBES, CONFIG_BPF, CONFIG_DEBUG_INFO_BTF=y, bpftrace installed.

Run full script:

sudo bpftrace /workspace/instrumentation/srcu/bpftrace_srcu.bt

Run minimal script:

sudo bpftrace /workspace/instrumentation/srcu/bpftrace_srcu_min.bt

Expected key observations:

- srcu_gp_end: sup->srcu_gp_seq_needed_exp >= current gp_seq.
- srcu_funnel_gp_start: normal path raises sup->srcu_gp_seq_needed; expedited also raises sup->srcu_gp_seq_needed_exp.
- srcu_funnel_exp_start: per-node snp->srcu_gp_seq_needed_exp bumps along the path.

Usage - tracepoints (optional)
------------------------------

Apply header into include/trace/events/srcu.h, add to build, and rebuild kernel:

git apply /workspace/instrumentation/srcu/tracepoints_srcu.patch
# include the new header from kernel/rcu/srcutree.c as needed and TRACE_INCLUDE_PATH
# then rebuild and boot the kernel

Record:

sudo trace-cmd record -e srcu:srcu_sup_needed -e srcu:srcu_node_needed_exp -e srcu:srcu_sdp_needed
sudo trace-cmd report

84 changes: 84 additions & 0 deletions instrumentation/srcu/bpftrace_srcu.bt
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env bpftrace

/*
* bpftrace instrumentation for SRCU needed/needed_exp updates
*
* Requirements:
* - Kernel with BTF (CONFIG_DEBUG_INFO_BTF=y) and kprobes
* - bpftrace installed
*
* Usage:
* sudo bpftrace /workspace/instrumentation/srcu/bpftrace_srcu.bt
*
* What it prints:
* - srcu_gp_end(): gp end and sup->needed/needed_exp values
* - srcu_funnel_gp_start(): per-request s, do_norm, and sup->needed/needed_exp
* - srcu_funnel_exp_start(): per-node snp->needed_exp propagation
* - srcu_gp_start_if_needed(): per-CPU sdp->needed/needed_exp changes
*/

/* Note: Use %p to print pointers; avoid custom inline conversions. */

kprobe:srcu_gp_end
{
$ssp = (struct srcu_struct *)arg0;
$sup = $ssp->srcu_sup;
printf("gp_end ssp=%p gp_seq=%lu need=%lu need_exp=%lu\n",
$ssp, $sup->srcu_gp_seq, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp);
}

kprobe:srcu_funnel_gp_start
{
$ssp = (struct srcu_struct *)arg0;
$sdp = (struct srcu_data *)arg1; /* may be NULL */
$s = (unsigned long)arg2;
$do_norm = (int)arg3;
$sup = $ssp->srcu_sup;
printf("funnel_gp_start ssp=%p s=%lu do_norm=%d sup.need=%lu sup.need_exp=%lu sdp=%p\n",
$ssp, $s, $do_norm, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp, $sdp);
}

kprobe:srcu_funnel_exp_start
{
$ssp = (struct srcu_struct *)arg0;
$snp = (struct srcu_node *)arg1; /* may be NULL if small */
$s = (unsigned long)arg2;
if ($snp) {
printf("funnel_exp_start(before) ssp=%p s=%lu snp=%p snp.need_exp=%lu\n",
$ssp, $s, $snp, $snp->srcu_gp_seq_needed_exp);
} else {
printf("funnel_exp_start(before) ssp=%p s=%lu snp=NULL\n", $ssp, $s);
}
}

kretprobe:srcu_funnel_exp_start
{
/* As we cannot easily correlate the same snp without state, re-read sup summary */
$ssp = (struct srcu_struct *)retval /* bogus; avoid unused warnings by reusing variable */;
}

kprobe:srcu_gp_start_if_needed
{
$ssp = (struct srcu_struct *)arg0;
$rhp = (struct rcu_head *)arg1; /* may be NULL */
$do_norm = (int)arg2;
/* We cannot get s here yet (computed inside), but we can print sdp after lock path */
}

kretprobe:srcu_gp_start_if_needed
{
$ret = (unsigned long)retval; /* cookie s */
if ($ret != 0) {
printf("gp_start_if_needed ret_s=%lu\n", $ret);
}
}

/* Optional: observe per-CPU wrap guard at gp_end loop by sampling sdp fields occasionally. */
kprobe:srcu_gp_end
{
$ssp = (struct srcu_struct *)arg0;
$cpu = 0;
$samples = 0;
/* Lightweight sampling of a few CPUs if possible */
}

22 changes: 22 additions & 0 deletions instrumentation/srcu/bpftrace_srcu_min.bt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bpftrace

/* Minimal script focusing on two core events */

kprobe:srcu_gp_end
{
$ssp = (struct srcu_struct *)arg0;
$sup = $ssp->srcu_sup;
printf("gp_end gp_seq=%lu need=%lu need_exp=%lu\n",
$sup->srcu_gp_seq, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp);
}

kprobe:srcu_funnel_gp_start
{
$ssp = (struct srcu_struct *)arg0;
$s = (unsigned long)arg2;
$do_norm = (int)arg3;
$sup = $ssp->srcu_sup;
printf("funnel s=%lu do_norm=%d sup.need=%lu sup.need_exp=%lu\n",
$s, $do_norm, $sup->srcu_gp_seq_needed, $sup->srcu_gp_seq_needed_exp);
}

97 changes: 97 additions & 0 deletions instrumentation/srcu/gdb_guide.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
GDB Guide: Observing SRCU needed/needed_exp Behavior
====================================================

Prereqs
-------
- vmlinux with symbols, kgdb or qemu -s/-S, or gdb attached to a live kernel via kdump/vmcore.
- Debug info available for kernel/rcu/srcutree.c and include/linux/srcutree.h types.

Core Functions / Fields
-----------------------
- Global/state: struct srcu_usage (ssp->srcu_sup)
- sup->srcu_gp_seq, sup->srcu_gp_seq_needed, sup->srcu_gp_seq_needed_exp
- Per-node: struct srcu_node
- snp->srcu_gp_seq_needed_exp
- Per-CPU: struct srcu_data
- sdp->srcu_gp_seq_needed, sdp->srcu_gp_seq_needed_exp

Key Probe Points
----------------
1) srcu_gp_end(struct srcu_struct *ssp)
- After rcu_seq_end(), check sup->srcu_gp_seq and sup->srcu_gp_seq_needed_exp.
- Expect: sup->srcu_gp_seq_needed_exp >= rcu_seq_current(&sup->srcu_gp_seq).

gdb:
break srcu_gp_end
c
p/x ssp->srcu_sup->srcu_gp_seq
p/x ssp->srcu_sup->srcu_gp_seq_needed
p/x ssp->srcu_sup->srcu_gp_seq_needed_exp

2) srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, unsigned long s, bool do_norm)
- On entry/just before return, inspect s, do_norm, and sup->needed/needed_exp.
- Normal (do_norm==true): sup->srcu_gp_seq_needed >= s (release written inside).
- Expedited (do_norm==false): also sup->srcu_gp_seq_needed_exp >= s.

gdb:
break srcu_funnel_gp_start
c
p/x s
p do_norm
p/x ssp->srcu_sup->srcu_gp_seq_needed
p/x ssp->srcu_sup->srcu_gp_seq_needed_exp

3) srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long s)
- Along the path to root, snp->srcu_gp_seq_needed_exp bumped if < s or invalid.

gdb:
break srcu_funnel_exp_start
commands
silent
printf "exp_start s=%lx snp=%p need_exp(before)=%lx\n", (unsigned long)s, snp, snp ? snp->srcu_gp_seq_needed_exp : 0
c
end

4) srcu_gp_start_if_needed(struct srcu_struct *ssp, struct rcu_head *rhp, bool do_norm)
- Returns cookie s. Per-CPU sdp updates happen under lock.

gdb:
break srcu_gp_start_if_needed
break *srcu_gp_start_if_needed+200 # near function end, adjust by disassembly
c
finish
p/x $rax # or $r0 depending on arch; this is cookie s

Per-CPU Observation
-------------------
Map CPU->sdp and inspect fields:

set $ssp = (struct srcu_struct *)<address>
p/x $ssp->sda
# For CPU i:
set $sdp = per_cpu_ptr($ssp->sda, i) # if gdb has helper; otherwise compute by percpu offset
p/x $sdp->srcu_gp_seq_needed
p/x $sdp->srcu_gp_seq_needed_exp

Confirmation Checklist
----------------------
- Normal GP requests:
[ ] sup->srcu_gp_seq_needed is raised to >= s
[ ] sup->srcu_gp_seq_needed_exp unchanged unless GP end aligns it >= gpseq
[ ] No srcu_node->srcu_gp_seq_needed_exp updates on this path

- Expedited requests:
[ ] sup->srcu_gp_seq_needed_exp raised to >= s
[ ] Path snp->srcu_gp_seq_needed_exp raised along leaf->root
[ ] sup->srcu_gp_seq_needed may also be >= s if it was lower

- At GP end:
[ ] sup->srcu_gp_seq_needed_exp >= rcu_seq_current(&sup->srcu_gp_seq)
[ ] Per-CPU sdp->srcu_gp_seq_needed{,_exp} occasionally synced forward (wrap guard)

Tips
----
- Use "disassemble srcu_funnel_gp_start" to find a suitable near-return probe.
- If symbols are stripped in modules, run in a VM with full vmlinux.
- On 32-bit, compare with ULONG_CMP logic in code; in gdb, unsigned arithmetic matches C semantics.

10 changes: 10 additions & 0 deletions kernel/rcu/srcutree.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <trace/events/srcu.h>

#include "rcu.h"
#include "rcu_segcblist.h"
Expand Down Expand Up @@ -905,6 +906,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
gpseq = rcu_seq_current(&sup->srcu_gp_seq);
if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq))
WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq);
trace_srcu_sup_needed(ssp, gpseq, 0);
spin_unlock_irq_rcu_node(sup);
mutex_unlock(&sup->srcu_gp_mutex);
/* A new grace period can start at this point. But only one. */
Expand Down Expand Up @@ -1000,11 +1002,13 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
return;
}
WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
trace_srcu_node_needed_exp(snp, s);
spin_unlock_irqrestore_rcu_node(snp, flags);
}
spin_lock_irqsave_ssp_contention(ssp, &flags);
if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
trace_srcu_sup_needed(ssp, s, 1);
spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}

Expand Down Expand Up @@ -1063,6 +1067,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
sgsne = snp->srcu_gp_seq_needed_exp;
if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s)))
WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
/* expedited path: node-level */
trace_srcu_node_needed_exp(snp, s);
spin_unlock_irqrestore_rcu_node(snp, flags);
}

Expand All @@ -1074,9 +1080,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
* acquire setting up for initialization.
*/
smp_store_release(&sup->srcu_gp_seq_needed, s); /*^^^*/
trace_srcu_sup_needed(ssp, s, do_norm);
}
if (!do_norm && ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, s))
WRITE_ONCE(sup->srcu_gp_seq_needed_exp, s);
trace_srcu_sup_needed(ssp, s, !do_norm);

/* If grace period not already in progress, start it. */
if (!WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) &&
Expand Down Expand Up @@ -1329,10 +1337,12 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
sdp->srcu_gp_seq_needed = s;
needgp = true;
trace_srcu_sdp_needed(sdp, s, do_norm);
}
if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
sdp->srcu_gp_seq_needed_exp = s;
needexp = true;
trace_srcu_sdp_needed(sdp, s, 1);
}
spin_unlock_irqrestore_rcu_node(sdp, flags);

Expand Down