Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 157 additions & 22 deletions .claude/skills/kernel-trace-analysis/scripts/pmc_l2_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
"""
PMC L2 / HBM efficiency analyzer.

Parses rocprofv3 PMC counter-collection CSV(s) and reports L2 cache behaviour
Parses rocprofv3 PMC counter-collection output and reports L2 cache behaviour
and HBM read efficiency for a kernel. Complements hotspot_analyzer.py, which
reads ATT instruction timing (no cache counters).

Supported input formats:
CSV : ``*_counter_collection.csv`` produced by older rocprofv3 or manually
exported from a rocpd DB. Expected columns:
Dispatch_Id, Kernel_Name, Counter_Name, Counter_Value
rocpd: SQLite DB (``*.db``) written by ``rocprofv3 --pmc`` on ROCm >= 7.x.
Tables are discovered automatically by UUID suffix:
rocpd_info_kernel_symbol_<uuid>
rocpd_kernel_dispatch_<uuid>
rocpd_info_pmc_<uuid>
rocpd_pmc_event_<uuid>

Counters expected (collect via capture-kernel-trace "PMC mode"):
L2 hit rate: TCC_HIT_sum, TCC_MISS_sum, TCC_REQ_sum
line utilization: TCC_EA0_RDREQ_sum, TCC_EA0_RDREQ_32B_sum
HBM traffic: TCC_EA0_RDREQ_DRAM_sum
L1->L2: TCP_TCC_READ_REQ_sum

Usage:
python pmc_l2_analyzer.py <pmc_csv> [<pmc_csv> ...] \
# CSV path
python pmc_l2_analyzer.py pmc_l2_counter_collection.csv \\
[pmc_ea_counter_collection.csv ...] \\
[--kernel pa_decode_ps_kernel_0] [--ideal-gb 8.59] [--ea-channels 2]

# rocpd DB path (ROCm >= 7.x)
python pmc_l2_analyzer.py pmc_l2_results.db [pmc_ea_results.db ...] \\
--kernel pa_decode_ps_kernel_0

Interpretation:
L2 hit rate : HIT/(HIT+MISS). For decode with independent per-sequence
paged KV there is no inter-CTA reuse, so ~1-3% is EXPECTED
Expand All @@ -30,46 +47,164 @@

import argparse
import csv
import re
import sqlite3
from collections import defaultdict


def load_counters(paths, kernel):
def _load_counters_from_csv(path, kernel):
"""Load PMC counters from a rocprofv3 CSV counter-collection file."""
agg = defaultdict(float)
dispatches = set()
with open(path) as f:
for r in csv.DictReader(f):
kn = r.get("Kernel_Name", "")
if kernel and kernel not in kn:
continue
name = r.get("Counter_Name")
val = r.get("Counter_Value")
if name is None or val in (None, ""):
continue
agg[name] += float(val)
dispatches.add(r.get("Dispatch_Id"))
return agg, dispatches


def _load_counters_from_db(path, kernel):
"""Load PMC counters from a rocpd SQLite DB (rocprofv3 >= ROCm 7.x).

rocprofv3 writes UUID-suffixed tables; the UUID is discovered by searching
for ``rocpd_pmc_event_*``. Required tables:

rocpd_info_kernel_symbol_<uuid> : id | name
rocpd_kernel_dispatch_<uuid> : id | kernel_symbol_id | ...
rocpd_info_pmc_<uuid> : id | name
rocpd_pmc_event_<uuid> : id | pmc_id | dispatch_id | value

Returns the same ``(agg, dispatches)`` pair as the CSV path.
"""
agg = defaultdict(float)
dispatches = set()

conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
try:
cur = conn.cursor()
tables = {t[0] for t in cur.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()}

# Discover UUID from rocpd_pmc_event_<uuid>
uuid = None
for t in sorted(tables):
m = re.match(r"rocpd_pmc_event_(.+)", t)
if m:
uuid = m.group(1)
break
if uuid is None:
return agg, dispatches

sym_t = f"rocpd_info_kernel_symbol_{uuid}"
disp_t = f"rocpd_kernel_dispatch_{uuid}"
pmc_info_t = f"rocpd_info_pmc_{uuid}"
pmc_event_t = f"rocpd_pmc_event_{uuid}"

required = {sym_t, disp_t, pmc_info_t, pmc_event_t}
if not required.issubset(tables):
missing = required - tables
print(f" Warning: rocpd DB missing tables: {', '.join(sorted(missing))}")
return agg, dispatches

# Resolve dispatch ids matching the kernel filter.
if kernel:
rows = cur.execute(
f"SELECT d.id FROM {disp_t} d" f" JOIN {sym_t} s ON d.kernel_symbol_id = s.id" f" WHERE s.name LIKE ?",
(f"%{kernel}%",),
).fetchall()
else:
rows = cur.execute(f"SELECT id FROM {disp_t}").fetchall()
dispatch_ids = [r[0] for r in rows]
if not dispatch_ids:
return agg, dispatches

dispatches = set(dispatch_ids)

# Aggregate counter values for the matched dispatches.
placeholders = ",".join("?" * len(dispatch_ids))
events = cur.execute(
f"SELECT p.name, e.value"
f" FROM {pmc_event_t} e"
f" JOIN {pmc_info_t} p ON e.pmc_id = p.id"
f" WHERE e.dispatch_id IN ({placeholders})",
dispatch_ids,
).fetchall()

for name, val in events:
agg[name] += float(val)
finally:
conn.close()

return agg, dispatches


def load_counters(paths, kernel):
"""Load and aggregate PMC counters from one or more CSV or rocpd DB files.

Each path is handled independently based on its extension:
``.db`` → rocpd SQLite DB (rocprofv3 >= ROCm 7.x)
anything else → CSV with columns Dispatch_Id/Kernel_Name/Counter_Name/Counter_Value

Returns ``(agg, n_dispatches)`` where ``agg`` maps counter name to total
value summed over all matched dispatches and files.
"""
agg = defaultdict(float)
all_dispatches = set()
fmt_used = set()

for p in paths:
with open(p) as f:
for r in csv.DictReader(f):
kn = r.get("Kernel_Name", "")
if kernel and kernel not in kn:
continue
name = r.get("Counter_Name")
val = r.get("Counter_Value")
if name is None or val in (None, ""):
continue
agg[name] += float(val)
dispatches.add(r.get("Dispatch_Id"))
return agg, len(dispatches)
if p.endswith(".db"):
sub_agg, sub_disp = _load_counters_from_db(p, kernel)
fmt_used.add("rocpd")
else:
sub_agg, sub_disp = _load_counters_from_csv(p, kernel)
fmt_used.add("csv")
for k, v in sub_agg.items():
agg[k] += v
all_dispatches |= sub_disp

if fmt_used:
print(f" Input format(s): {', '.join(sorted(fmt_used))}")
return agg, len(all_dispatches)


def main():
ap = argparse.ArgumentParser(description="PMC L2/HBM efficiency analyzer")
ap.add_argument("csv", nargs="+", help="pmc *_counter_collection.csv file(s)")
ap.add_argument(
"inputs",
nargs="+",
metavar="FILE",
help="PMC counter file(s): *_counter_collection.csv or *.db (rocpd SQLite)",
)
ap.add_argument("--kernel", default="", help="substring filter on Kernel_Name")
ap.add_argument("--ideal-gb", type=float, default=0.0,
help="ideal HBM read bytes per dispatch in GB (for over-fetch ratio)")
ap.add_argument("--ea-channels", type=int, default=2,
help="EA interfaces to scale single-channel EA0 counters by (default 2)")
ap.add_argument(
"--ideal-gb",
type=float,
default=0.0,
help="ideal HBM read bytes per dispatch in GB (for over-fetch ratio)",
)
ap.add_argument(
"--ea-channels",
type=int,
default=2,
help="EA interfaces to scale single-channel EA0 counters by (default 2)",
)
args = ap.parse_args()

agg, ndisp = load_counters(args.csv, args.kernel)
agg, ndisp = load_counters(args.inputs, args.kernel)
if not agg:
print("No matching counter rows found.")
return 1

print(f" Dispatches matched: {ndisp}")
hit = agg.get("TCC_HIT_sum", 0)
miss = agg.get("TCC_MISS_sum", 0)
req = agg.get("TCC_REQ_sum", 0)
ea = agg.get("TCC_EA0_RDREQ_sum", 0)
ea32 = agg.get("TCC_EA0_RDREQ_32B_sum", 0)
dram = agg.get("TCC_EA0_RDREQ_DRAM_sum", 0)
Expand Down