Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions dismiss.test.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import { describe, it, expect } from "vitest";
import { findingKey, numberFindings, parseDismissals, filterSuppressed, DismissTracker } from "./dismiss";
import {
findingKey,
numberFindings,
parseDismissals,
filterSuppressed,
DismissTracker,
} from "./dismiss";

describe("findingKey", () => {
it("extracts severity + location from finding line", () => {
const line = '- **Medium:** src/gateway/model.ts:97 — chatId extraction uses wrong index';
const line = "- **Medium:** src/gateway/model.ts:97 — chatId extraction uses wrong index";
const key = findingKey(line);
expect(key).toContain("medium:src/gateway/model.ts:97");
});

it("handles numbered finding format (F# prefix)", () => {
const line = '- **F1 Medium:** src/foo.ts:10 — something bad';
const line = "- **F1 Medium:** src/foo.ts:10 — something bad";
const key = findingKey(line);
expect(key).toBe("medium:src/foo.ts:10 — something bad");
});
Expand All @@ -22,7 +28,7 @@ describe("findingKey", () => {

describe("numberFindings", () => {
it("numbers finding bullets sequentially", () => {
const text = '- **High:** foo.ts:1 — bug\n- **Low:** bar.ts:2 — nit\nSome other text';
const text = "- **High:** foo.ts:1 — bug\n- **Low:** bar.ts:2 — nit\nSome other text";
const { numbered, findings } = numberFindings(text);
expect(numbered).toContain("**F1 High:**");
expect(numbered).toContain("**F2 Low:**");
Expand All @@ -31,7 +37,7 @@ describe("numberFindings", () => {
});

it("preserves non-finding lines unchanged", () => {
const text = 'Header\n\n- **Medium:** x.ts:5 — issue\n\nFooter';
const text = "Header\n\n- **Medium:** x.ts:5 — issue\n\nFooter";
const { numbered } = numberFindings(text);
expect(numbered).toContain("Header");
expect(numbered).toContain("Footer");
Expand All @@ -41,7 +47,8 @@ describe("numberFindings", () => {

describe("parseDismissals", () => {
it("parses DISMISS F# with colon separator", () => {
const text = "The chatId extraction is intentional.\nDISMISS F1: intentional design for telegram thread format";
const text =
"The chatId extraction is intentional.\nDISMISS F1: intentional design for telegram thread format";
const dismissals = parseDismissals(text);
expect(dismissals.size).toBe(1);
expect(dismissals.get(1)).toBe("intentional design for telegram thread format");
Expand Down Expand Up @@ -69,29 +76,29 @@ describe("parseDismissals", () => {

describe("filterSuppressed", () => {
it("removes suppressed findings", () => {
const text = '- **High:** foo.ts:1 — bug one\n- **Low:** bar.ts:2 — nit two';
const suppressed = new Set([findingKey('- **High:** foo.ts:1 — bug one')]);
const text = "- **High:** foo.ts:1 — bug one\n- **Low:** bar.ts:2 — nit two";
const suppressed = new Set([findingKey("- **High:** foo.ts:1 — bug one")]);
const result = filterSuppressed(text, suppressed);
expect(result).not.toContain("bug one");
expect(result).toContain("nit two");
});

it("returns null when all findings suppressed", () => {
const text = '- **High:** foo.ts:1 — bug one';
const suppressed = new Set([findingKey('- **High:** foo.ts:1 — bug one')]);
const text = "- **High:** foo.ts:1 — bug one";
const suppressed = new Set([findingKey("- **High:** foo.ts:1 — bug one")]);
expect(filterSuppressed(text, suppressed)).toBeNull();
});

it("returns original when no suppressions", () => {
const text = '- **Low:** x.ts:5 — something';
const text = "- **Low:** x.ts:5 — something";
expect(filterSuppressed(text, new Set())).toBe(text);
});
});

describe("DismissTracker", () => {
it("tracks dismissals and suppresses after threshold", () => {
const tracker = new DismissTracker();
const findings = ['- **Medium:** src/foo.ts:10 — bad pattern', '- **Low:** src/bar.ts:5 — nit'];
const findings = ["- **Medium:** src/foo.ts:10 — bad pattern", "- **Low:** src/bar.ts:5 — nit"];
tracker.setLastFindings(findings);

// First dismiss
Expand All @@ -106,7 +113,7 @@ describe("DismissTracker", () => {

it("reset clears all state", () => {
const tracker = new DismissTracker();
tracker.setLastFindings(['- **High:** x.ts:1 — bug']);
tracker.setLastFindings(["- **High:** x.ts:1 — bug"]);
tracker.processDismissals("DISMISS F1: nope");
tracker.processDismissals("DISMISS F1: nope again");
expect(tracker.getSuppressed().size).toBe(1);
Expand Down
32 changes: 18 additions & 14 deletions dismiss.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,18 @@ export function numberFindings(text: string): { numbered: string; findings: stri
const findings: string[] = [];
let counter = 0;

const numbered = lines.map(line => {
// Match finding bullets: - **Severity:** ...
const match = line.match(/^(\s*-\s*)\*\*(\w+):\*\*(.*)$/);
if (match) {
counter++;
findings.push(line);
return `${match[1]}**F${counter} ${match[2]}:**${match[3]}`;
}
return line;
}).join("\n");
const numbered = lines
.map((line) => {
// Match finding bullets: - **Severity:** ...
const match = line.match(/^(\s*-\s*)\*\*(\w+):\*\*(.*)$/);
if (match) {
counter++;
findings.push(line);
return `${match[1]}**F${counter} ${match[2]}:**${match[3]}`;
}
return line;
})
.join("\n");

return { numbered, findings };
}
Expand All @@ -53,7 +55,7 @@ export function numberFindings(text: string): { numbered: string; findings: stri
export function parseDismissals(text: string): Map<number, string> {
const dismissals = new Map<number, string>();
// Match: DISMISS F1: reason or DISMISS F1 - reason or DISMISS F1 reason
const pattern = /DISMISS\s+F(\d+)\s*[:–\-]\s*(.+)/gi;
const pattern = /DISMISS\s+F(\d+)\s*[:–-]\s*(.+)/gi;
let match;
while ((match = pattern.exec(text)) !== null) {
dismissals.set(parseInt(match[1], 10), match[2].trim());
Expand All @@ -66,15 +68,15 @@ export function filterSuppressed(text: string, suppressed: Set<string>): string
if (suppressed.size === 0) return text;

const lines = text.split("\n");
const filtered = lines.filter(line => {
const filtered = lines.filter((line) => {
const match = line.match(/^\s*-\s*\*\*\w+:\*\*/);
if (!match) return true; // keep non-finding lines
const key = findingKey(line);
return !suppressed.has(key);
});

// If all findings were suppressed, return null (should be LGTM)
const remaining = filtered.filter(l => l.match(/^\s*-\s*\*\*/));
const remaining = filtered.filter((l) => l.match(/^\s*-\s*\*\*/));
if (remaining.length === 0) return null;

return filtered.join("\n");
Expand Down Expand Up @@ -112,7 +114,9 @@ export class DismissTracker {
this.dismissed.set(key, { key, reason, count: 1 });
}
count++;
log(`dismiss: F${fNum} dismissed (${key}) — "${reason}" [count=${this.dismissed.get(key)!.count}]`);
log(
`dismiss: F${fNum} dismissed (${key}) — "${reason}" [count=${this.dismissed.get(key)!.count}]`,
);
}
return count;
}
Expand Down
4 changes: 3 additions & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,9 @@ export default function (pi: ExtensionAPI) {

// Process DISMISS markers from agent's response (before running review)
if (lastAssistant) {
const textParts = (lastAssistant.content ?? []).filter((b: any) => b.type === "text").map((b: any) => b.text);
const textParts = (lastAssistant.content ?? [])
.filter((b: any) => b.type === "text")
.map((b: any) => b.text);
const agentText = textParts.join("\n");
if (agentText) {
orchestrator.processDismissals(agentText);
Expand Down
47 changes: 47 additions & 0 deletions judge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ TAXONOMY (authoritative):
- npm/pnpm/yarn/pip/cargo install, make, cargo build, npm run format, codegen scripts → modifying
- kill/pkill/systemctl, docker run, docker compose up → modifying
- sed -i, perl -pi → modifying (in-place edit)
- perl -e, python -c, node -e, ruby -e (one-liners) → unsure (static analysis cannot determine side effects; caught by detectSubprocessWrapper pre-check)
- ./script.sh or npm run <unknown> → unsure unless clearly read-only
- truncated command (e.g. "git commi") → unsure
- Compound commands with &&, ;, ||, pipes, subshells: ANY modifying part → modifying; ANY unknown/truncated → unsure; otherwise the class of the safest-subset.
Expand Down Expand Up @@ -101,16 +102,62 @@ export function parseJudgeResponse(raw: string): BashClassification {
return "unsure";
}

/**
* Detect subprocess wrappers (perl -e, python -c, node -e, ruby -e) that cannot
* be statically analyzed. Returns 'unsure' for these patterns (conservative).
*
* Note: False-positives are acceptable here (fail-safe direction). Regexes match
* broadly to catch unquoted/variable forms (perl -e $code) and even literal strings
* in other commands (echo "perl -e foo"), since marking those 'unsure' is
* safer than missing an actual one-liner.
*/
function detectSubprocessWrapper(command: string): BashClassification | null {
if (!command) return null;

// perl -e / -E with any argument (quoted, unquoted, or variable)
if (/\bperl\b.*\s-[eE](?:\s+\S|$)/.test(command)) {
return "unsure";
}

// python / python3 -c with any argument
if (/\bpython\d?\b.*\s-c(?:\s+\S|$)/.test(command)) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Detect no-space -e/-c one-liners in wrapper pre-check

The wrapper regexes require whitespace after the flag via (?:\s+\S|$), so valid no-space forms like python -c'print(1)', ruby -e'puts 1', and perl -e'print 1' are missed by the deterministic guard. Those commands then fall through to the LLM classifier, which can return inspection_vcs_noop and incorrectly suppress review for code that may mutate files. Since the goal of this pre-check is to fail safe on opaque one-liners, these common shell forms should also be matched.

Useful? React with 👍 / 👎.

return "unsure";
}

// node -e / -E / --eval with any argument
if (/\bnode\b.*(?:\s-[eE]|--eval)(?:\s+\S|$)/.test(command)) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Match node --eval= in subprocess wrapper pre-check

The new Node wrapper regex misses the canonical long-option form --eval=...: node --help documents -e, --eval=..., but this pattern only matches when --eval is followed by whitespace or end-of-command. As a result, commands like node --eval=require('fs').writeFileSync(...) bypass the deterministic unsure guard and fall through to the LLM path, which can incorrectly suppress review if it returns inspection_vcs_noop; at minimum, this defeats the intended hard fail-safe for opaque one-liners.

Useful? React with 👍 / 👎.

return "unsure";
}

// ruby -e with any argument
if (/\bruby\b.*\s-e(?:\s+\S|$)/.test(command)) {
return "unsure";
}

return null;
}

/**
* Run the judge on a single bash command. Always resolves (never rejects);
* any failure collapses to `unsure` so the caller's skip logic stays safe.
*
* Pre-checks for subprocess wrappers (perl -e, python -c, etc.) before calling
* the LLM, since these patterns cannot be statically analyzed.
*/
export async function classifyBashCommand(
runner: JudgeRunner,
command: string,
opts: JudgeOptions,
): Promise<BashClassification> {
if (!command || typeof command !== "string") return "unsure";

// Deterministic pre-check: subprocess wrappers
const subprocessResult = detectSubprocessWrapper(command);
if (subprocessResult) {
log(`judge: detected subprocess wrapper (${command.slice(0, 40)}...) → unsure`);
return subprocessResult;
}

try {
const { text } = await runner(command, opts);
return parseJudgeResponse(text);
Expand Down
6 changes: 5 additions & 1 deletion orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,11 @@ export class ReviewOrchestrator {
// All findings suppressed — treat as LGTM
if (filtered === null) {
log("dismiss: all findings suppressed — treating as LGTM");
return { ...result, isLgtm: true, text: "No issues found (previously dismissed findings suppressed)." };
return {
...result,
isLgtm: true,
text: "No issues found (previously dismissed findings suppressed).",
};
}

// Number remaining findings and track them
Expand Down
112 changes: 112 additions & 0 deletions test/judge.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,115 @@ describe("classifyBashCommand", () => {
expect(await classifyBashCommand(runner, "something", fakeOpts)).toBe("unsure");
});
});

describe("classifyBashCommand - subprocess wrapper detection", () => {
const fakeOpts = { signal: AbortSignal.timeout(5000), cwd: "/tmp", model: "test/model" };

it("returns unsure for perl -e without calling the runner", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, "perl -e 'print 1'", fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});

it("returns unsure for python -c without calling the runner", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, 'python -c "print(1)"', fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});

it("returns unsure for python3 -c without calling the runner", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, 'python3 -c "print(1)"', fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});

it("returns unsure for node -e without calling the runner", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, 'node -e "console.log(1)"', fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});

it("returns unsure for node --eval without calling the runner", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, 'node --eval "console.log(1)"', fakeOpts)).toBe(
"unsure",
);
expect(called).toBe(0);
});

it("returns unsure for ruby -e without calling the runner", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, 'ruby -e "puts 1"', fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});

it("calls the runner for non-wrapper commands", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, "git status", fakeOpts)).toBe("inspection_vcs_noop");
expect(called).toBe(1);
});
});

describe("classifyBashCommand - subprocess wrapper edge cases", () => {
const fakeOpts = { signal: AbortSignal.timeout(5000), cwd: "/tmp", model: "test/model" };

it("returns unsure for perl -e without quotes (unquoted form)", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, "perl -e some_code", fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});

it("returns unsure for python -c with variable expansion", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
expect(await classifyBashCommand(runner, "python -c $payload", fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});

it("handles false-positive: literal perl string in echo (acceptable fail-safe)", async () => {
let called = 0;
const runner: JudgeRunner = async () => {
called++;
return { text: '{"classification":"inspection_vcs_noop"}' };
};
// This is a false-positive (echo doesn't execute perl), but it's acceptable
// because: (1) it's safe (echo is harmless), (2) fail-safe is better than missing real one-liners
expect(await classifyBashCommand(runner, 'echo "perl -e foo"', fakeOpts)).toBe("unsure");
expect(called).toBe(0);
});
});
Loading