From 3c81c77ec56ab7e675bfa0b0663f510927a8ae1b Mon Sep 17 00:00:00 2001 From: Loki FastStart Date: Sun, 10 May 2026 04:08:11 +0000 Subject: [PATCH] feat: DISMISS F# protocol for false-positive suppression When the reviewer flags findings the agent disagrees with, the agent can reply 'DISMISS F1: reason'. After 2 dismissals of the same finding, it's suppressed in future review cycles within that session. - Findings are numbered (F1, F2, ...) in review messages - Agent told to use DISMISS F#: reason format - DismissTracker in orchestrator stores dismissed findings - filterSuppressed removes them before display - If all findings suppressed, review auto-passes as LGTM - 14 new unit tests for dismiss module - All 413 tests green --- ARCHITECTURE.md | 1 + CHANGELOG.md | 6 ++ dismiss.test.ts | 117 +++++++++++++++++++++++++++++++++++++++ dismiss.ts | 136 ++++++++++++++++++++++++++++++++++++++++++++++ index.ts | 19 ++++++- message-sender.ts | 2 +- orchestrator.ts | 31 +++++++++++ package.json | 2 +- 8 files changed, 311 insertions(+), 3 deletions(-) create mode 100644 dismiss.test.ts create mode 100644 dismiss.ts diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index e7e3461..781fb7a 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -39,6 +39,7 @@ All arrows mean "imports from". No circular dependencies exist. ▼ ▼ ▼ ▼ ▼ ▼ orchestrator.ts commands.ts message-sender.ts review-display.ts judge.ts | | | + ├── dismiss.ts ├── reviewer.ts (injected) ├── reviewer.ts (types) ├── context.ts (injected) └── logger.ts ├── architect.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b92baec..5803b3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 1.2.0 (2026-05-10) + +### Features + +- **DISMISS F# protocol** for false-positive suppression. Findings are now numbered (F1, F2, ...) in review messages. Agent can reply `DISMISS F1: reason` to mark a finding as intentional. After 2 dismissals of the same finding, it's auto-suppressed in future review cycles. If all findings are suppressed, review passes as LGTM and push is unblocked. + ## 1.0.3 (2026-05-09) ### Changes diff --git a/dismiss.test.ts b/dismiss.test.ts new file mode 100644 index 0000000..2c5a563 --- /dev/null +++ b/dismiss.test.ts @@ -0,0 +1,117 @@ +import { describe, it, expect } from "vitest"; +import { findingKey, numberFindings, parseDismissals, filterSuppressed, DismissTracker } from "./dismiss"; + +describe("findingKey", () => { + it("extracts severity + location from finding line", () => { + const line = '- **Medium:** src/gateway/model.ts:97 — chatId extraction uses wrong index'; + const key = findingKey(line); + expect(key).toContain("medium:src/gateway/model.ts:97"); + }); + + it("handles numbered finding format (F# prefix)", () => { + const line = '- **F1 Medium:** src/foo.ts:10 — something bad'; + const key = findingKey(line); + expect(key).toBe("medium:src/foo.ts:10 — something bad"); + }); + + it("falls back to trimmed line for non-matching format", () => { + const key = findingKey("some random text"); + expect(key).toBe("some random text"); + }); +}); + +describe("numberFindings", () => { + it("numbers finding bullets sequentially", () => { + const text = '- **High:** foo.ts:1 — bug\n- **Low:** bar.ts:2 — nit\nSome other text'; + const { numbered, findings } = numberFindings(text); + expect(numbered).toContain("**F1 High:**"); + expect(numbered).toContain("**F2 Low:**"); + expect(numbered).toContain("Some other text"); + expect(findings).toHaveLength(2); + }); + + it("preserves non-finding lines unchanged", () => { + const text = 'Header\n\n- **Medium:** x.ts:5 — issue\n\nFooter'; + const { numbered } = numberFindings(text); + expect(numbered).toContain("Header"); + expect(numbered).toContain("Footer"); + expect(numbered).toContain("**F1 Medium:**"); + }); +}); + +describe("parseDismissals", () => { + it("parses DISMISS F# with colon separator", () => { + const text = "The chatId extraction is intentional.\nDISMISS F1: intentional design for telegram thread format"; + const dismissals = parseDismissals(text); + expect(dismissals.size).toBe(1); + expect(dismissals.get(1)).toBe("intentional design for telegram thread format"); + }); + + it("parses multiple dismissals", () => { + const text = "DISMISS F1: by design\nDISMISS F2: not a real issue"; + const dismissals = parseDismissals(text); + expect(dismissals.size).toBe(2); + expect(dismissals.get(1)).toBe("by design"); + expect(dismissals.get(2)).toBe("not a real issue"); + }); + + it("parses dash separator", () => { + const text = "DISMISS F3 - already reviewed"; + const dismissals = parseDismissals(text); + expect(dismissals.get(3)).toBe("already reviewed"); + }); + + it("returns empty map when no dismissals", () => { + const text = "I fixed both issues as suggested."; + expect(parseDismissals(text).size).toBe(0); + }); +}); + +describe("filterSuppressed", () => { + it("removes suppressed findings", () => { + const text = '- **High:** foo.ts:1 — bug one\n- **Low:** bar.ts:2 — nit two'; + const suppressed = new Set([findingKey('- **High:** foo.ts:1 — bug one')]); + const result = filterSuppressed(text, suppressed); + expect(result).not.toContain("bug one"); + expect(result).toContain("nit two"); + }); + + it("returns null when all findings suppressed", () => { + const text = '- **High:** foo.ts:1 — bug one'; + const suppressed = new Set([findingKey('- **High:** foo.ts:1 — bug one')]); + expect(filterSuppressed(text, suppressed)).toBeNull(); + }); + + it("returns original when no suppressions", () => { + const text = '- **Low:** x.ts:5 — something'; + expect(filterSuppressed(text, new Set())).toBe(text); + }); +}); + +describe("DismissTracker", () => { + it("tracks dismissals and suppresses after threshold", () => { + const tracker = new DismissTracker(); + const findings = ['- **Medium:** src/foo.ts:10 — bad pattern', '- **Low:** src/bar.ts:5 — nit']; + tracker.setLastFindings(findings); + + // First dismiss + tracker.processDismissals("DISMISS F1: intentional"); + expect(tracker.getSuppressed().size).toBe(0); // threshold is 2 + + // Second dismiss (same finding via new review) + tracker.setLastFindings(findings); + tracker.processDismissals("DISMISS F1: still intentional"); + expect(tracker.getSuppressed().size).toBe(1); + }); + + it("reset clears all state", () => { + const tracker = new DismissTracker(); + tracker.setLastFindings(['- **High:** x.ts:1 — bug']); + tracker.processDismissals("DISMISS F1: nope"); + tracker.processDismissals("DISMISS F1: nope again"); + expect(tracker.getSuppressed().size).toBe(1); + + tracker.reset(); + expect(tracker.getSuppressed().size).toBe(0); + }); +}); diff --git a/dismiss.ts b/dismiss.ts new file mode 100644 index 0000000..2b298e9 --- /dev/null +++ b/dismiss.ts @@ -0,0 +1,136 @@ +/** + * dismiss.ts — Track and suppress dismissed review findings + * + * When the agent responds to a review with "DISMISS F#: reason", + * that finding is tracked. If the same finding appears in a subsequent + * review cycle and has been dismissed >= threshold times, it's suppressed. + * + * Finding identity: severity + file:line + first 60 chars of problem text. + */ + +import { log } from "./logger"; + +export interface DismissedFinding { + key: string; + reason: string; + count: number; +} + +/** How many times a finding must be dismissed before it's auto-suppressed. */ +const SUPPRESS_THRESHOLD = 2; + +/** Extract a stable key from a finding bullet line. */ +export function findingKey(line: string): string { + // Format: - **Severity:** file:line — problem text + const match = line.match(/^\s*-\s*\*\*(?:F\d+\s+)?(\w+):\*\*\s*(.+)/); + if (!match) return line.trim().slice(0, 80); + const severity = match[1].toLowerCase(); + const rest = match[2].trim().slice(0, 60); + return `${severity}:${rest}`; +} + +/** Number findings in review text and return the numbered version + finding list. */ +export function numberFindings(text: string): { numbered: string; findings: string[] } { + const lines = text.split("\n"); + const findings: string[] = []; + let counter = 0; + + const numbered = lines.map(line => { + // Match finding bullets: - **Severity:** ... + const match = line.match(/^(\s*-\s*)\*\*(\w+):\*\*(.*)$/); + if (match) { + counter++; + findings.push(line); + return `${match[1]}**F${counter} ${match[2]}:**${match[3]}`; + } + return line; + }).join("\n"); + + return { numbered, findings }; +} + +/** Parse DISMISS markers from agent text. Returns map of F# → reason. */ +export function parseDismissals(text: string): Map { + const dismissals = new Map(); + // Match: DISMISS F1: reason or DISMISS F1 - reason or DISMISS F1 reason + const pattern = /DISMISS\s+F(\d+)\s*[:–\-]\s*(.+)/gi; + let match; + while ((match = pattern.exec(text)) !== null) { + dismissals.set(parseInt(match[1], 10), match[2].trim()); + } + return dismissals; +} + +/** Filter suppressed findings from review text. Returns filtered text or null if all suppressed. */ +export function filterSuppressed(text: string, suppressed: Set): string | null { + if (suppressed.size === 0) return text; + + const lines = text.split("\n"); + const filtered = lines.filter(line => { + const match = line.match(/^\s*-\s*\*\*\w+:\*\*/); + if (!match) return true; // keep non-finding lines + const key = findingKey(line); + return !suppressed.has(key); + }); + + // If all findings were suppressed, return null (should be LGTM) + const remaining = filtered.filter(l => l.match(/^\s*-\s*\*\*/)); + if (remaining.length === 0) return null; + + return filtered.join("\n"); +} + +/** + * Dismiss tracker — stores dismissed findings across review loops. + * Scoped to one orchestrator instance (one session). + */ +export class DismissTracker { + private dismissed = new Map(); + private lastFindings: string[] = []; + + /** Record the findings from the latest review (for F# → finding mapping). */ + setLastFindings(findings: string[]): void { + this.lastFindings = findings; + } + + /** Process agent's response text for DISMISS markers. */ + processDismissals(agentText: string): number { + const markers = parseDismissals(agentText); + if (markers.size === 0) return 0; + + let count = 0; + for (const [fNum, reason] of markers) { + const finding = this.lastFindings[fNum - 1]; // F1 = index 0 + if (!finding) continue; + + const key = findingKey(finding); + const existing = this.dismissed.get(key); + if (existing) { + existing.count++; + existing.reason = reason; + } else { + this.dismissed.set(key, { key, reason, count: 1 }); + } + count++; + log(`dismiss: F${fNum} dismissed (${key}) — "${reason}" [count=${this.dismissed.get(key)!.count}]`); + } + return count; + } + + /** Get the set of finding keys that should be suppressed (dismissed >= threshold). */ + getSuppressed(): Set { + const suppressed = new Set(); + for (const [key, entry] of this.dismissed) { + if (entry.count >= SUPPRESS_THRESHOLD) { + suppressed.add(key); + } + } + return suppressed; + } + + /** Reset all dismissals (e.g. on session end). */ + reset(): void { + this.dismissed.clear(); + this.lastFindings = []; + } +} diff --git a/index.ts b/index.ts index d62da0a..d92d980 100644 --- a/index.ts +++ b/index.ts @@ -615,10 +615,18 @@ export default function (pi: ExtensionAPI) { const hasArchitectStep = Boolean(outcome.architect); const hasArchitectFailure = Boolean(outcome.architectFailure); const hasFollowUp = hasArchitectStep || hasArchitectFailure; + // Apply dismiss filtering (suppress previously dismissed findings, number remaining) + const filteredResult = orchestrator.applyDismissFiltering(outcome.senior.result); + + // If dismiss filtering converted issues to LGTM, clear the issues state + if (filteredResult.isLgtm && !outcome.senior.result.isLgtm) { + orchestrator.clearIssuesAfterDismiss(); + } + // Always trigger a turn for ISSUES_FOUND so agent can fix. // Also trigger for LGTM so agent can continue (push, etc.). // Skip triggering only when architect (success or failure) follows — it sends its own message. - sendReviewResult(pi, outcome.senior.result, outcome.senior.label ?? "", { + sendReviewResult(pi, filteredResult, outcome.senior.label ?? "", { showLoopCount: outcome.senior.loopInfo, reviewedFiles: outcome.files, triggerTurn: !hasFollowUp, @@ -696,6 +704,15 @@ export default function (pi: ExtensionAPI) { return; } + // Process DISMISS markers from agent's response (before running review) + if (lastAssistant) { + const textParts = (lastAssistant.content ?? []).filter((b: any) => b.type === "text").map((b: any) => b.text); + const agentText = textParts.join("\n"); + if (agentText) { + orchestrator.processDismissals(agentText); + } + } + if (!orchestrator.isEnabled) { // Keep tracking state (modifiedFiles, agentToolCalls) so we can // offer to review when the user toggles review back on. diff --git a/message-sender.ts b/message-sender.ts index 76ce898..e1b7145 100644 --- a/message-sender.ts +++ b/message-sender.ts @@ -74,7 +74,7 @@ export function sendReviewResult( pi.sendMessage( { customType: "code-review", - content: `🔍 **Automated Code Review**${loopInfo || (label ? ` (${label})` : "")} — ${duration}\n\nA separate reviewer examined your recent changes and found potential issues:\n\n${result.text}${fileList}${idFooter}\n\nPlease review these findings. If any are valid, fix them. If they're false positives, briefly explain why and move on.\n\n⚠️ **Do NOT push to remote yet.** Fix any issues first. Do NOT push after fixing either — a new review cycle will check your fixes automatically.`, + content: `🔍 **Automated Code Review**${loopInfo || (label ? ` (${label})` : "")} — ${duration}\n\nA separate reviewer examined your recent changes and found potential issues:\n\n${result.text}${fileList}${idFooter}\n\nPlease review these findings. If any are valid, fix them. If they're false positives, reply with \`DISMISS F#: reason\` (e.g. \`DISMISS F1: intentional design\`) and move on.\n\n⚠️ **Do NOT push to remote yet.** Fix any issues first. Do NOT push after fixing either — a new review cycle will check your fixes automatically.`, display: true, }, { triggerTurn: opts?.triggerTurn ?? true, deliverAs: "followUp" }, diff --git a/orchestrator.ts b/orchestrator.ts index c36d53f..9a19e5b 100644 --- a/orchestrator.ts +++ b/orchestrator.ts @@ -10,6 +10,7 @@ import type { AutoReviewSettings } from "./settings"; import { runArchitectReview, shouldRunArchitectReview } from "./architect"; import type { ReviewResult, ReviewRunner } from "./reviewer"; import { log } from "./logger"; +import { DismissTracker, numberFindings, filterSuppressed } from "./dismiss"; const MIN_REVIEW_CONTENT_LENGTH = 50; @@ -110,6 +111,7 @@ export class ReviewOrchestrator { private sessionChangedFiles = new Set(); private sessionHasGitContent = false; private lastReviewHadIssues = false; + private readonly dismissTracker = new DismissTracker(); constructor(opts: ReviewOrchestratorOptions) { this.runner = opts.runner; @@ -148,6 +150,35 @@ export class ReviewOrchestrator { this.isReviewingValue = false; this.resetCycleState(); this.lastReviewHadIssues = false; + this.dismissTracker.reset(); + } + + /** Process agent's response text for DISMISS markers. Call before review. */ + processDismissals(agentText: string): number { + return this.dismissTracker.processDismissals(agentText); + } + + /** Apply dismiss filtering + numbering to review result text. */ + applyDismissFiltering(result: ReviewResult): ReviewResult { + const suppressed = this.dismissTracker.getSuppressed(); + const filtered = filterSuppressed(result.text, suppressed); + + // All findings suppressed — treat as LGTM + if (filtered === null) { + log("dismiss: all findings suppressed — treating as LGTM"); + return { ...result, isLgtm: true, text: "No issues found (previously dismissed findings suppressed)." }; + } + + // Number remaining findings and track them + const { numbered, findings } = numberFindings(filtered); + this.dismissTracker.setLastFindings(findings); + return { ...result, text: numbered }; + } + + /** Clear issue state after dismiss filtering converts to LGTM. */ + clearIssuesAfterDismiss(): void { + this.lastReviewHadIssues = false; + this.loopCount = 0; } cancel(): void { diff --git a/package.json b/package.json index b1c52ec..5780501 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@inceptionstack/pi-hard-no", - "version": "1.1.0", + "version": "1.2.0", "type": "module", "description": "Pi extension — automatic code review after every agent turn", "license": "MIT",