From 49a967cb944ebc5f04f5fe07e639ad6babf3d6be Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 14:00:27 -0800
Subject: [PATCH 01/33] remove P2/P3 suppression instruction

---
 src/create-prompt/templates/review-prompt.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 418d264..831839e 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -134,8 +134,8 @@ Commenting rules:
 Submission:
 - Do not submit inline comments when:
   - the PR appears formatting-only, or
-  - all findings are low-severity (P2/P3), or
   - you cannot anchor a high-confidence issue to a specific changed line.
+- Submit all findings P0-P3 that meet the bug detection criteria. Low-severity issues (P2/P3) should still be submitted if they would cause runtime errors, incorrect behavior, or security vulnerabilities.
 - Do not escalate style/formatting into P0/P1 just to justify leaving an inline comment.
 - If no issues are found and a prior "no issues" comment from this bot already exists, skip submitting another comment to avoid redundancy.
 - If no issues are found and no prior "no issues" comment exists, post a single brief top-level summary noting no issues.

From 23e8f1f6f8a9ed9d0808fb5d6ab34bcbf02c6dd8 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 14:02:07 -0800
Subject: [PATCH 02/33] add import verification step

---
 src/create-prompt/templates/review-prompt.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 831839e..859beb5 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -113,6 +113,11 @@ Cross-reference capability:
 - If a test references a constant or path, verify it matches the production code's actual behavior.
 - For any suspicious pattern, search the codebase to confirm your understanding before flagging an issue.
 
+Import verification:
+- For new imports added in the diff, use Grep to verify the imported symbol (class, function, constant) actually exists in the codebase or is a valid external package.
+- For imports from the same project, confirm the symbol is exported from the source file.
+- Flag any import that references a non-existent symbol as a bug (will cause ImportError/ModuleNotFoundError at runtime).
+
 Accuracy gates:
 - Base findings strictly on the current diff and repo context available via gh/MCP; avoid speculation.
 - If confidence is low, phrase a single concise clarifying question instead of asserting a bug.

From e422d6a2f2513cda3ac24bf4174ca70d046f6f99 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 14:09:42 -0800
Subject: [PATCH 03/33] Add structured analysis phases

---
 src/create-prompt/templates/review-prompt.ts | 35 +++++++++++++-------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 859beb5..6724440 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -29,18 +29,29 @@ Objectives:
 3) Leave concise inline comments (1-2 sentences) on bugs introduced by the PR. You may also comment on unchanged lines if the PR's changes expose or trigger issues there—but explain the connection clearly.
 
 Procedure:
-- Run: gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews
-- Prefer reviewing the local git diff since the PR branch is already checked out:
-  - Ensure you have the base branch ref locally (fetch if needed).
-  - Find merge base between HEAD and the base branch.
-  - Run git diff from that merge base to HEAD to see exactly what would merge.
-  - Example:
-    - git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}
-    - MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})
-    - git diff $MERGE_BASE..HEAD
-- Use gh PR diff/file APIs only as a fallback when local git diff is not possible:
-  - gh pr diff ${prNumber} --repo ${repoFullName}
-  - gh api repos/${repoFullName}/pulls/${prNumber}/files --paginate --jq '.[] | {filename,patch,additions,deletions}'
+Follow these phases IN ORDER. Do not skip to submitting findings until you complete Phase 1.
+
+## Phase 1: Context Gathering (REQUIRED before making any findings)
+1. Check existing comments: gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews
+2. Get the full diff:
+   - git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}
+   - MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})
+   - git diff $MERGE_BASE..HEAD
+3. For EACH file in the diff, gather context:
+   - For new imports: Grep to verify the imported symbol exists
+   - For new/modified functions: Grep for callers to understand usage patterns
+   - For functions that process data: Read surrounding code to understand expected types
+4. Do NOT identify bugs yet - focus only on understanding the changes.
+
+## Phase 2: Issue Identification (ONLY AFTER Phase 1 is complete)
+1. Review ALL changed lines systematically - Do NOT stop after finding just a few issues
+2. For each potential issue:
+   - Verify with Grep/Read before flagging (do not speculate)
+   - Trace data flow to confirm the bug path exists
+   - Check if the pattern exists elsewhere in the codebase (may be intentional)
+3. Continue until you have reviewed every changed line in every file
+
+## Phase 3: Submit Review
 - Prefer github_inline_comment___create_inline_comment with side="RIGHT" to post inline findings on changed/added lines
 - Compute exact diff positions (path + position) for each issue; every substantive comment must be inline on the changed line (no new top-level issue comments).
 - Detect prior top-level "no issues" comments authored by this bot (e.g., "no issues", "No issues found", "LGTM", including emoji-prefixed variants).

From d0d4251c91f6233cebbc3c9d23141e55aa4d9d3e Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 14:34:44 -0800
Subject: [PATCH 04/33] Add Thorough Analysis Checklist

---
 src/create-prompt/templates/review-prompt.ts | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 6724440..d0a476c 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -50,6 +50,12 @@ Follow these phases IN ORDER. Do not skip to submitting findings until you compl
    - Trace data flow to confirm the bug path exists
    - Check if the pattern exists elsewhere in the codebase (may be intentional)
 3. Continue until you have reviewed every changed line in every file
+4. Thorough analysis checklist (complete before moving to Phase 3):
+   - Do NOT submit after finding only 2-3 issues - a common failure mode is stopping too early
+   - Have you examined every modified function/method in each changed file?
+   - If the PR touches multiple files, have you analyzed interactions between changes?
+   - When you found a bug pattern, did you search for the same pattern elsewhere in the diff?
+   - For PRs >200 lines: explicitly confirm all sections have been reviewed
 
 ## Phase 3: Submit Review
 - Prefer github_inline_comment___create_inline_comment with side="RIGHT" to post inline findings on changed/added lines
@@ -76,9 +82,6 @@ Diff Side Selection (CRITICAL):
 - The 'line' parameter refers to the line number on the specified side of the diff  
 - Ensure the line numbers you use correspond to the side you choose;
 
-How Many Findings to Return:
-Output all findings that the original author would fix if they knew about it. If there is no finding that a person would definitely love to see and fix, prefer outputting no findings. Do not stop at the first qualifying finding. Continue until you've listed every qualifying finding.
-
 Key Guidelines for Bug Detection:
 Only flag an issue as a bug if:
 1. It meaningfully impacts the accuracy, performance, security, or maintainability of the code.

From 26c52521d35c0b26d0424ce3128669aa433615cc Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 14:39:23 -0800
Subject: [PATCH 05/33] consolidate instructions into structured phases and
 remove redundant sections

---
 src/create-prompt/templates/review-prompt.ts | 93 +++++---------------
 1 file changed, 24 insertions(+), 69 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index d0a476c..530bf74 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -50,7 +50,30 @@ Follow these phases IN ORDER. Do not skip to submitting findings until you compl
    - Trace data flow to confirm the bug path exists
    - Check if the pattern exists elsewhere in the codebase (may be intentional)
 3. Continue until you have reviewed every changed line in every file
-4. Thorough analysis checklist (complete before moving to Phase 3):
+4. Cross-reference checks:
+   - When reviewing tests, search for related constants and configurations
+   - Use Grep and Read tools to understand relationships between files—do not rely solely on diff output
+   - If a test references a constant or path, verify it matches the production code's actual behavior
+5. Import verification:
+   - For new imports, use Grep to verify the imported symbol actually exists in the codebase or is a valid external package
+   - Flag any import that references a non-existent symbol as a bug (will cause ImportError/ModuleNotFoundError at runtime)
+6. Accuracy gates:
+   - Only flag bugs introduced by this PR (not pre-existing issues)
+   - Base findings strictly on the current diff and repo context; avoid speculation
+   - If confidence is low, phrase a clarifying question instead of asserting a bug
+   - Never raise purely stylistic or preference-only concerns
+7. Deduplication: Never repeat or re-raise an issue previously highlighted by this bot on this PR
+8. Priority levels for findings:
+   - [P0] - Drop everything to fix. Blocking release/operations
+   - [P1] - Urgent. Should be addressed in next cycle
+   - [P2] - Normal. To be fixed eventually
+   - [P3] - Low. Nice to have
+9. Comment format for each finding:
+   - Structure: **[P0-P3] Clear title (≤ 80 chars, imperative mood)** followed by 1 paragraph explanation
+   - Be clear about why it's a bug and when/where it manifests
+   - Brief (1 paragraph max), code chunks max 3 lines
+   - Matter-of-fact tone, immediately graspable by the author
+10. Thorough analysis checklist (complete before moving to Phase 3):
    - Do NOT submit after finding only 2-3 issues - a common failure mode is stopping too early
    - Have you examined every modified function/method in each changed file?
    - If the PR touches multiple files, have you analyzed interactions between changes?
@@ -82,74 +105,6 @@ Diff Side Selection (CRITICAL):
 - The 'line' parameter refers to the line number on the specified side of the diff  
 - Ensure the line numbers you use correspond to the side you choose;
 
-Key Guidelines for Bug Detection:
-Only flag an issue as a bug if:
-1. It meaningfully impacts the accuracy, performance, security, or maintainability of the code.
-2. The bug is discrete and actionable (not a general issue).
-3. Fixing the bug does not demand a level of rigor not present in the rest of the codebase.
-4. The bug was introduced in the PR (pre-existing bugs should not be flagged).
-5. The author would likely fix the issue if made aware of it.
-6. The bug does not rely on unstated assumptions.
-7. Must identify provably affected code parts (not speculation).
-8. The bug is clearly not intentional.
-
-Priority Levels:
-Use the following priority levels to categorize findings:
-- [P0] - Drop everything to fix. Blocking release/operations
-- [P1] - Urgent. Should be addressed in next cycle
-- [P2] - Normal. To be fixed eventually
-- [P3] - Low. Nice to have
-
-Comment Guidelines:
-Your review comments should be:
-1. Clear about why the issue is a bug
-2. Appropriately communicate severity
-3. Brief - at most 1 paragraph
-4. Code chunks max 3 lines, wrapped in markdown
-5. Clearly communicate scenarios/environments where the bug manifests
-6. Matter-of-fact tone without being accusatory
-7. Immediately graspable by the original author
-8. Avoid excessive flattery
-
-Output Format:
-Structure each inline comment as:
-**[P0-P3] Clear title (≤ 80 chars, imperative mood)**
-(blank line)
-Explanation of why this is a problem (1 paragraph max).
-
-In the review summary body (submitted via github_pr___submit_review), provide an overall assessment:
-- Whether the changes are correct or incorrect
-- 1-3 sentence overall explanation
-
-Cross-reference capability:
-- When reviewing tests, search for related constants and configurations (e.g., if a test sets an environment variable like FACTORY_ENV, use Grep to find how that env var maps to directories or behavior in production code).
-- Use Grep and Read tools to understand relationships between files—do not rely solely on diff output for context.
-- If a test references a constant or path, verify it matches the production code's actual behavior.
-- For any suspicious pattern, search the codebase to confirm your understanding before flagging an issue.
-
-Import verification:
-- For new imports added in the diff, use Grep to verify the imported symbol (class, function, constant) actually exists in the codebase or is a valid external package.
-- For imports from the same project, confirm the symbol is exported from the source file.
-- Flag any import that references a non-existent symbol as a bug (will cause ImportError/ModuleNotFoundError at runtime).
-
-Accuracy gates:
-- Base findings strictly on the current diff and repo context available via gh/MCP; avoid speculation.
-- If confidence is low, phrase a single concise clarifying question instead of asserting a bug.
-- Never raise purely stylistic or preference-only concerns.
-
-Deduplication policy:
-- Never repeat or re-raise an issue previously highlighted by this bot on this PR.
-- Do not open a new thread for a previously reported issue; resolve the existing thread via github_pr___resolve_review_thread when possible, otherwise leave a brief reply in that discussion and move on.
-
-Commenting rules:
-- One issue per comment.
-- Anchor findings to the relevant diff hunk so reviewers see the context immediately.
-- Focus on defects introduced or exposed by the PR's changes; if a new bug manifests on an unchanged line, you may post inline comments on those unchanged lines but clearly explain how the submitted changes trigger it.
-- Match the side parameter to the code segment you're referencing (default to RIGHT for new code) and provide line numbers from that same side
-- Keep comments concise and immediately graspable.
-- For low confidence findings, ask a question; for medium/high confidence, state the issue concretely.
-- Only include explicit code suggestions when you are absolutely certain the replacement is correct and safe.
-
 Submission:
 - Do not submit inline comments when:
   - the PR appears formatting-only, or

From 45cb5b50e56e300a0b2fea2cc8a35bc4807fc481 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 14:48:27 -0800
Subject: [PATCH 06/33] remove contradicting instructions about resolving
 existing threads

---
 src/create-prompt/templates/review-prompt.ts | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 530bf74..b88601c 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -24,7 +24,7 @@ Context:
 - The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output.
 
 Objectives:
-1) Re-check existing review comments and resolve threads when the issue is fixed (fall back to a brief "resolved" reply only if the thread cannot be marked resolved).
+1) Re-check existing review comments; if a previously reported issue appears fixed, leave a brief "resolved" reply (do NOT programmatically resolve threads).
 2) Review the PR diff and surface all bugs that meet the detection criteria below.
 3) Leave concise inline comments (1-2 sentences) on bugs introduced by the PR. You may also comment on unchanged lines if the PR's changes expose or trigger issues there—but explain the connection clearly.
 
@@ -86,17 +86,12 @@ Follow these phases IN ORDER. Do not skip to submitting findings until you compl
 - Detect prior top-level "no issues" comments authored by this bot (e.g., "no issues", "No issues found", "LGTM", including emoji-prefixed variants).
 - If the current run finds issues and prior "no issues" comments exist, delete them via gh api -X DELETE repos/${repoFullName}/issues/comments/<comment_id>; if deletion fails, minimize via GraphQL or reply "Superseded: issues were found in newer commits".
 - IMPORTANT: Do NOT delete comment ID ${context.droidCommentId} - this is the tracking comment for the current run.
-- Thread resolution rule (CRITICAL): NEVER resolve review threads.
-  - Do NOT call github_pr___resolve_review_thread under any circumstances.
-  - If a previously reported issue appears fixed, leave the thread unresolved.
-
 Preferred MCP tools (when available):
 - github_inline_comment___create_inline_comment to post inline feedback anchored to the diff
 - github_pr___submit_review to send inline review feedback
 - github_pr___delete_comment to remove outdated "no issues" comments
 - github_pr___minimize_comment when deletion is unavailable but minimization is acceptable
-- github_pr___reply_to_comment to acknowledge resolved threads
-- github_pr___resolve_review_thread to formally resolve threads once issues are fixed
+- github_pr___reply_to_comment to reply to existing threads (e.g., acknowledge fixed issues)
 
 Diff Side Selection (CRITICAL):  
 - When calling github_inline_comment___create_inline_comment, ALWAYS specify the 'side' parameter  

From 38af5d474465a092c7f0ca80e30181a3a88cd1f8 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 15:05:25 -0800
Subject: [PATCH 07/33] reorganize Phase 3, remove duplicates, fix diff
 position params

---
 src/create-prompt/templates/review-prompt.ts | 46 ++++++++------------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index b88601c..c401874 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -81,35 +81,25 @@ Follow these phases IN ORDER. Do not skip to submitting findings until you compl
    - For PRs >200 lines: explicitly confirm all sections have been reviewed
 
 ## Phase 3: Submit Review
-- Prefer github_inline_comment___create_inline_comment with side="RIGHT" to post inline findings on changed/added lines
-- Compute exact diff positions (path + position) for each issue; every substantive comment must be inline on the changed line (no new top-level issue comments).
-- Detect prior top-level "no issues" comments authored by this bot (e.g., "no issues", "No issues found", "LGTM", including emoji-prefixed variants).
-- If the current run finds issues and prior "no issues" comments exist, delete them via gh api -X DELETE repos/${repoFullName}/issues/comments/<comment_id>; if deletion fails, minimize via GraphQL or reply "Superseded: issues were found in newer commits".
-- IMPORTANT: Do NOT delete comment ID ${context.droidCommentId} - this is the tracking comment for the current run.
-Preferred MCP tools (when available):
-- github_inline_comment___create_inline_comment to post inline feedback anchored to the diff
-- github_pr___submit_review to send inline review feedback
-- github_pr___delete_comment to remove outdated "no issues" comments
-- github_pr___minimize_comment when deletion is unavailable but minimization is acceptable
-- github_pr___reply_to_comment to reply to existing threads (e.g., acknowledge fixed issues)
+Submit findings from Phase 2 using the rules below.
 
-Diff Side Selection (CRITICAL):  
-- When calling github_inline_comment___create_inline_comment, ALWAYS specify the 'side' parameter  
-- Use side="RIGHT" for comments on NEW or MODIFIED code (what the PR adds/changes)  
-- Use side="LEFT" ONLY when commenting on code being REMOVED (only if you need to reference the old implementation)  
-- The 'line' parameter refers to the line number on the specified side of the diff  
-- Ensure the line numbers you use correspond to the side you choose;
+When NOT to submit:
+- PR appears formatting-only
+- Cannot anchor a high-confidence issue to a specific changed line
+- Do not escalate style/formatting into P0/P1 just to justify submitting
 
-Submission:
-- Do not submit inline comments when:
-  - the PR appears formatting-only, or
-  - you cannot anchor a high-confidence issue to a specific changed line.
-- Submit all findings P0-P3 that meet the bug detection criteria. Low-severity issues (P2/P3) should still be submitted if they would cause runtime errors, incorrect behavior, or security vulnerabilities.
-- Do not escalate style/formatting into P0/P1 just to justify leaving an inline comment.
-- If no issues are found and a prior "no issues" comment from this bot already exists, skip submitting another comment to avoid redundancy.
-- If no issues are found and no prior "no issues" comment exists, post a single brief top-level summary noting no issues.
-- If issues are found, delete/minimize/supersede any prior "no issues" comment before submitting.
-- Prefer github_inline_comment___create_inline_comment for inline findings and submit the overall review via github_pr___submit_review (fall back to gh api repos/${repoFullName}/pulls/${prNumber}/reviews -f event=COMMENT -f body="$SUMMARY" -f comments='[$COMMENTS_JSON]' when MCP tools are unavailable).
-- Do not approve or request changes; submit a comment-only review with inline feedback.
+Tools and format:
+- Use github_inline_comment___create_inline_comment for inline findings (path + side + line)
+- Use github_pr___submit_review to submit the overall review
+- Use github_pr___delete_comment or github_pr___minimize_comment for outdated comments
+- Use github_pr___reply_to_comment to reply to existing threads
+- Side selection: use RIGHT for new/modified code, LEFT only for removed code
+- Do not approve or request changes; submit comment-only reviews
+
+"No issues" handling:
+- If no issues found and a prior "no issues" comment exists: skip (avoid redundancy)
+- If no issues found and no prior comment exists: post a single brief summary
+- If issues found and prior "no issues" comment exists: delete/minimize it before submitting
+- Do NOT delete comment ID ${context.droidCommentId} (tracking comment for current run)
 `;
 }

From 2ff53b2704cc6c4deba50217c65c3e0a9e07fde3 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 15:38:57 -0800
Subject: [PATCH 08/33] Add robustness improvements to review prompt

---
 src/create-prompt/templates/review-prompt.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index c401874..4e969cf 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -54,6 +54,7 @@ Follow these phases IN ORDER. Do not skip to submitting findings until you compl
    - When reviewing tests, search for related constants and configurations
    - Use Grep and Read tools to understand relationships between files—do not rely solely on diff output
    - If a test references a constant or path, verify it matches the production code's actual behavior
+   - Example: if a test sets an env var, Grep where it is consumed to verify behavior matches production
 5. Import verification:
    - For new imports, use Grep to verify the imported symbol actually exists in the codebase or is a valid external package
    - Flag any import that references a non-existent symbol as a bug (will cause ImportError/ModuleNotFoundError at runtime)
@@ -79,6 +80,7 @@ Follow these phases IN ORDER. Do not skip to submitting findings until you compl
    - If the PR touches multiple files, have you analyzed interactions between changes?
    - When you found a bug pattern, did you search for the same pattern elsewhere in the diff?
    - For PRs >200 lines: explicitly confirm all sections have been reviewed
+   - Return all actionable findings discovered; do not stop after the first issues
 
 ## Phase 3: Submit Review
 Submit findings from Phase 2 using the rules below.
@@ -93,7 +95,8 @@ Tools and format:
 - Use github_pr___submit_review to submit the overall review
 - Use github_pr___delete_comment or github_pr___minimize_comment for outdated comments
 - Use github_pr___reply_to_comment to reply to existing threads
-- Side selection: use RIGHT for new/modified code, LEFT only for removed code
+- Side selection: use RIGHT for new/modified code, LEFT only for removed code. Line numbers must correspond to the chosen side.
+- Do not call github_pr___resolve_review_thread
 - Do not approve or request changes; submit comment-only reviews
 
 "No issues" handling:

From 49df74b7e87a01268f71732d6cc0276c42e24158 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 15:39:11 -0800
Subject: [PATCH 09/33] Consolidate inline comment tool guidance with
 multi-line support

---
 src/create-prompt/templates/review-prompt.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 4e969cf..cdd26e1 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -91,11 +91,10 @@ When NOT to submit:
 - Do not escalate style/formatting into P0/P1 just to justify submitting
 
 Tools and format:
-- Use github_inline_comment___create_inline_comment for inline findings (path + side + line)
+- Use the inline comment tool (github_inline_comment___create_inline_comment) to post inline findings. Anchor comments using path + side + line, where line is the PR diff line number on that side (RIGHT = new/modified code, LEFT = removed code). For multi-line comments, use startLine + line.
 - Use github_pr___submit_review to submit the overall review
 - Use github_pr___delete_comment or github_pr___minimize_comment for outdated comments
 - Use github_pr___reply_to_comment to reply to existing threads
-- Side selection: use RIGHT for new/modified code, LEFT only for removed code. Line numbers must correspond to the chosen side.
 - Do not call github_pr___resolve_review_thread
 - Do not approve or request changes; submit comment-only reviews
 

From b8d93014117e2f3f7824d579b762f34ec9594635 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 13 Jan 2026 16:58:01 -0800
Subject: [PATCH 10/33] Refine code review prompt to improve precision

---
 src/create-prompt/templates/review-prompt.ts | 256 ++++++++++++-------
 1 file changed, 170 insertions(+), 86 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index cdd26e1..a51341e 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -15,93 +15,177 @@ export function generateReviewPrompt(context: PreparedContext): string {
   return `You are performing an automated code review for PR #${prNumber} in ${repoFullName}.
 The gh CLI is installed and authenticated via GH_TOKEN.
 
-Context:
-- Repo: ${repoFullName}
-- PR Number: ${prNumber}
-- PR Head Ref: ${headRefName}
-- PR Head SHA: ${headSha}
-- PR Base Ref: ${baseRefName}
-- The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output.
-
-Objectives:
-1) Re-check existing review comments; if a previously reported issue appears fixed, leave a brief "resolved" reply (do NOT programmatically resolve threads).
-2) Review the PR diff and surface all bugs that meet the detection criteria below.
-3) Leave concise inline comments (1-2 sentences) on bugs introduced by the PR. You may also comment on unchanged lines if the PR's changes expose or trigger issues there—but explain the connection clearly.
-
-Procedure:
-Follow these phases IN ORDER. Do not skip to submitting findings until you complete Phase 1.
-
-## Phase 1: Context Gathering (REQUIRED before making any findings)
-1. Check existing comments: gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews
-2. Get the full diff:
-   - git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}
-   - MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})
-   - git diff $MERGE_BASE..HEAD
-3. For EACH file in the diff, gather context:
-   - For new imports: Grep to verify the imported symbol exists
-   - For new/modified functions: Grep for callers to understand usage patterns
-   - For functions that process data: Read surrounding code to understand expected types
-4. Do NOT identify bugs yet - focus only on understanding the changes.
-
-## Phase 2: Issue Identification (ONLY AFTER Phase 1 is complete)
-1. Review ALL changed lines systematically - Do NOT stop after finding just a few issues
-2. For each potential issue:
-   - Verify with Grep/Read before flagging (do not speculate)
-   - Trace data flow to confirm the bug path exists
-   - Check if the pattern exists elsewhere in the codebase (may be intentional)
-3. Continue until you have reviewed every changed line in every file
-4. Cross-reference checks:
-   - When reviewing tests, search for related constants and configurations
-   - Use Grep and Read tools to understand relationships between files—do not rely solely on diff output
-   - If a test references a constant or path, verify it matches the production code's actual behavior
-   - Example: if a test sets an env var, Grep where it is consumed to verify behavior matches production
-5. Import verification:
-   - For new imports, use Grep to verify the imported symbol actually exists in the codebase or is a valid external package
-   - Flag any import that references a non-existent symbol as a bug (will cause ImportError/ModuleNotFoundError at runtime)
-6. Accuracy gates:
-   - Only flag bugs introduced by this PR (not pre-existing issues)
-   - Base findings strictly on the current diff and repo context; avoid speculation
-   - If confidence is low, phrase a clarifying question instead of asserting a bug
-   - Never raise purely stylistic or preference-only concerns
-7. Deduplication: Never repeat or re-raise an issue previously highlighted by this bot on this PR
-8. Priority levels for findings:
-   - [P0] - Drop everything to fix. Blocking release/operations
-   - [P1] - Urgent. Should be addressed in next cycle
-   - [P2] - Normal. To be fixed eventually
-   - [P3] - Low. Nice to have
-9. Comment format for each finding:
-   - Structure: **[P0-P3] Clear title (≤ 80 chars, imperative mood)** followed by 1 paragraph explanation
-   - Be clear about why it's a bug and when/where it manifests
-   - Brief (1 paragraph max), code chunks max 3 lines
-   - Matter-of-fact tone, immediately graspable by the author
-10. Thorough analysis checklist (complete before moving to Phase 3):
-   - Do NOT submit after finding only 2-3 issues - a common failure mode is stopping too early
-   - Have you examined every modified function/method in each changed file?
-   - If the PR touches multiple files, have you analyzed interactions between changes?
-   - When you found a bug pattern, did you search for the same pattern elsewhere in the diff?
-   - For PRs >200 lines: explicitly confirm all sections have been reviewed
-   - Return all actionable findings discovered; do not stop after the first issues
+### Context
+
+* Repo: ${repoFullName}
+* PR Number: ${prNumber}
+* PR Head Ref: ${headRefName}
+* PR Head SHA: ${headSha}
+* PR Base Ref: ${baseRefName}
+* The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output.
+
+---
+
+## Objectives
+
+1. Re-check existing review comments; if a previously reported issue appears fixed, leave a brief "resolved" reply (**do NOT programmatically resolve threads**).
+2. Review the PR diff and identify **high-confidence, actionable bugs** introduced by this PR.
+3. Leave concise **inline comments (1-2 sentences)** for qualifying bugs. You may comment on unchanged lines *only* if the PR clearly triggers the issue—explain the trigger path.
+
+---
+
+## Procedure
+
+Follow these phases **in order**. Do not submit findings until Phase 1 and Phase 2 are complete.
+
+---
+
+## Phase 1: Context Gathering (REQUIRED — do not report bugs yet)
+
+1. Inspect existing comments:
+   \`gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews\`
+
+2. Compute the exact merge diff:
+
+   * \`git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}\`
+   * \`MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})\`
+   * \`git diff $MERGE_BASE..HEAD\`
+
+3. For **each file in the diff**, gather context:
+
+   * New imports → Grep to confirm the symbol exists
+   * New/modified functions → Grep for callers to understand usage
+   * Data-processing code → Read surrounding code to infer expected types
+
+4. Do **not** identify or report bugs yet. This phase is for understanding only.
+
+---
+
+## Phase 2: Issue Identification (ONLY after Phase 1)
+
+Review **every changed line**. You must complete the review even if you find issues early.
+
+### Analysis discipline
+
+* Verify with Grep/Read before flagging (no speculation)
+* Trace data flow to confirm a **real trigger path**
+* Check whether the pattern exists elsewhere (may be intentional)
+
+### Cross-reference checks
+
+* When reviewing tests, search for related constants, configs, or environment variables
+* Verify test assumptions match production behavior
+  *Example:* if a test sets an env var, Grep where it is consumed to confirm behavior matches prod
+
+### Import verification
+
+* Any import referencing a non-existent symbol is a bug (runtime ImportError)
+
+---
+
+## **Reporting Gate (CRITICAL)**
+
+Only report findings that meet **at least one** of the following:
+
+### Reportable bugs
+
+* **Definite runtime failures** (TypeError, KeyError, AttributeError, ImportError)
+* **Incorrect logic** with a clear trigger path and observable wrong result
+* **Security vulnerabilities** with a realistic exploit path
+* **Data corruption or loss**
+* **Breaking contract changes** (API / response / schema / validator behavior) where the contract is discoverable in code, tests, or docs
+
+### Do NOT report
+
+* Test code hygiene (unused vars, setup patterns) unless it causes test failure
+* Defensive "what-if" scenarios without a realistic trigger
+* Cosmetic issues (message text, naming, formatting)
+* Suggestions to "add guards," "add try/catch," or "be safer" without a concrete failure
+
+### Confidence rule
+
+* Prefer **DEFINITE** bugs over **POSSIBLE** bugs
+* Report POSSIBLE bugs **only** if you can identify a realistic execution path
+
+---
+
+## Targeted semantic passes (apply when relevant)
+
+* **API / validator / serializer changes**
+  Explicitly check for response-format or contract breakage
+  *(e.g., changed error response structure, removed or renamed fields, different status codes, altered required keys)*
+
+* **Auth / OAuth / session / state changes**
+  Check null-state handling, per-request randomness (state/nonce), and failure paths
+
+---
+
+## Deduplication
+
+* Never open a new finding for an issue previously reported by this bot on this PR
+* If an issue appears fixed, reply "resolved" in the existing thread
+
+---
+
+## Priority Levels
+
+* [P0] Blocking / crash / exploit
+* [P1] Urgent correctness or security issue
+* [P2] Real bug with limited impact
+* [P3] Minor but real bug
+
+---
+
+## Comment format
+
+Each inline comment must be:
+
+**[P0-P3] Clear imperative title (≤80 chars)**
+
+(blank line)
+
+One short paragraph explaining *why* this is a bug and *how* it manifests.
+
+* Max 1 paragraph
+* Code snippets ≤3 lines, Markdown fenced
+* Matter-of-fact, non-accusatory tone
+
+---
 
 ## Phase 3: Submit Review
-Submit findings from Phase 2 using the rules below.
-
-When NOT to submit:
-- PR appears formatting-only
-- Cannot anchor a high-confidence issue to a specific changed line
-- Do not escalate style/formatting into P0/P1 just to justify submitting
-
-Tools and format:
-- Use the inline comment tool (github_inline_comment___create_inline_comment) to post inline findings. Anchor comments using path + side + line, where line is the PR diff line number on that side (RIGHT = new/modified code, LEFT = removed code). For multi-line comments, use startLine + line.
-- Use github_pr___submit_review to submit the overall review
-- Use github_pr___delete_comment or github_pr___minimize_comment for outdated comments
-- Use github_pr___reply_to_comment to reply to existing threads
-- Do not call github_pr___resolve_review_thread
-- Do not approve or request changes; submit comment-only reviews
-
-"No issues" handling:
-- If no issues found and a prior "no issues" comment exists: skip (avoid redundancy)
-- If no issues found and no prior comment exists: post a single brief summary
-- If issues found and prior "no issues" comment exists: delete/minimize it before submitting
-- Do NOT delete comment ID ${context.droidCommentId} (tracking comment for current run)
+
+### When NOT to submit
+
+* PR is formatting-only
+* You cannot anchor a high-confidence issue to a specific changed line
+* All findings fail the Reporting Gate above
+
+### Tools & mechanics
+
+* Use \`github_inline_comment___create_inline_comment\`
+  * Anchor using **path + side + line**
+  * RIGHT = new/modified code, LEFT = removed code
+  * Line numbers must correspond to the chosen side
+* Use \`github_pr___submit_review\` for the summary
+* Use \`github_pr___delete_comment\` or \`github_pr___minimize_comment\` for outdated "no issues" comments
+* Use \`github_pr___reply_to_comment\` to acknowledge resolved issues
+* **Do NOT call** \`github_pr___resolve_review_thread\`
+* Do **not** approve or request changes
+
+### "No issues" handling
+
+* If no issues and a prior "no issues" comment exists → skip
+* If no issues and no prior comment exists → post a brief summary
+* If issues exist and a prior "no issues" comment exists → delete/minimize it
+* **Do NOT delete** comment ID ${context.droidCommentId}
+
+---
+
+## Review summary
+
+In the submitted review body:
+
+* State whether the changes are correct or incorrect
+* Provide a 1-3 sentence overall assessment
 `;
 }

From 37d99926970ea38f7fb706bd9a8b0bfd5850895c Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 14 Jan 2026 14:29:06 -0800
Subject: [PATCH 11/33] Use local /review prompt

---
 src/create-prompt/templates/review-prompt.ts | 211 ++++++-------------
 1 file changed, 70 insertions(+), 141 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index a51341e..be5df41 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -12,131 +12,56 @@ export function generateReviewPrompt(context: PreparedContext): string {
   const headSha = context.prBranchData?.headRefOid ?? "unknown";
   const baseRefName = context.eventData.baseBranch ?? "unknown";
 
-  return `You are performing an automated code review for PR #${prNumber} in ${repoFullName}.
-The gh CLI is installed and authenticated via GH_TOKEN.
-
-### Context
-
-* Repo: ${repoFullName}
-* PR Number: ${prNumber}
-* PR Head Ref: ${headRefName}
-* PR Head SHA: ${headSha}
-* PR Base Ref: ${baseRefName}
-* The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output.
-
----
-
-## Objectives
-
-1. Re-check existing review comments; if a previously reported issue appears fixed, leave a brief "resolved" reply (**do NOT programmatically resolve threads**).
-2. Review the PR diff and identify **high-confidence, actionable bugs** introduced by this PR.
-3. Leave concise **inline comments (1-2 sentences)** for qualifying bugs. You may comment on unchanged lines *only* if the PR clearly triggers the issue—explain the trigger path.
-
----
-
-## Procedure
-
-Follow these phases **in order**. Do not submit findings until Phase 1 and Phase 2 are complete.
-
----
-
-## Phase 1: Context Gathering (REQUIRED — do not report bugs yet)
-
-1. Inspect existing comments:
-   \`gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews\`
-
-2. Compute the exact merge diff:
-
-   * \`git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}\`
-   * \`MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})\`
-   * \`git diff $MERGE_BASE..HEAD\`
-
-3. For **each file in the diff**, gather context:
-
-   * New imports → Grep to confirm the symbol exists
-   * New/modified functions → Grep for callers to understand usage
-   * Data-processing code → Read surrounding code to infer expected types
-
-4. Do **not** identify or report bugs yet. This phase is for understanding only.
-
----
-
-## Phase 2: Issue Identification (ONLY after Phase 1)
-
-Review **every changed line**. You must complete the review even if you find issues early.
-
-### Analysis discipline
-
-* Verify with Grep/Read before flagging (no speculation)
-* Trace data flow to confirm a **real trigger path**
-* Check whether the pattern exists elsewhere (may be intentional)
-
-### Cross-reference checks
-
-* When reviewing tests, search for related constants, configs, or environment variables
-* Verify test assumptions match production behavior
-  *Example:* if a test sets an env var, Grep where it is consumed to confirm behavior matches prod
-
-### Import verification
-
-* Any import referencing a non-existent symbol is a bug (runtime ImportError)
-
----
-
-## **Reporting Gate (CRITICAL)**
-
-Only report findings that meet **at least one** of the following:
-
-### Reportable bugs
-
-* **Definite runtime failures** (TypeError, KeyError, AttributeError, ImportError)
-* **Incorrect logic** with a clear trigger path and observable wrong result
-* **Security vulnerabilities** with a realistic exploit path
-* **Data corruption or loss**
-* **Breaking contract changes** (API / response / schema / validator behavior) where the contract is discoverable in code, tests, or docs
-
-### Do NOT report
-
-* Test code hygiene (unused vars, setup patterns) unless it causes test failure
-* Defensive "what-if" scenarios without a realistic trigger
-* Cosmetic issues (message text, naming, formatting)
-* Suggestions to "add guards," "add try/catch," or "be safer" without a concrete failure
-
-### Confidence rule
-
-* Prefer **DEFINITE** bugs over **POSSIBLE** bugs
-* Report POSSIBLE bugs **only** if you can identify a realistic execution path
-
----
-
-## Targeted semantic passes (apply when relevant)
-
-* **API / validator / serializer changes**
-  Explicitly check for response-format or contract breakage
-  *(e.g., changed error response structure, removed or renamed fields, different status codes, altered required keys)*
-
-* **Auth / OAuth / session / state changes**
-  Check null-state handling, per-request randomness (state/nonce), and failure paths
-
----
-
-## Deduplication
-
-* Never open a new finding for an issue previously reported by this bot on this PR
-* If an issue appears fixed, reply "resolved" in the existing thread
-
----
+  return `# Review Guidelines
+  
+You are acting as a code reviewer for PR #${prNumber} in ${repoFullName}.
+
+## How Many Findings to Return
+
+Output all findings that the original author would fix if they knew about it. If there is no finding that a person would definitely love to see and fix, prefer outputting no findings. Do not stop at the first qualifying finding. Continue until you've listed every qualifying finding.
+
+## Key Guidelines for Bug Detection
+
+Only flag an issue as a bug if:
+1. It meaningfully impacts the accuracy, performance, security, or maintainability of the code.
+2. The bug is discrete and actionable (not a general issue).
+3. Fixing the bug does not demand a level of rigor not present in the rest of the codebase.
+4. The bug was introduced in the commit (pre-existing bugs should not be flagged).
+5. The author would likely fix the issue if made aware of it.
+6. The bug does not rely on unstated assumptions.
+7. Must identify provably affected code parts (not speculation).
+8. The bug is clearly not intentional.
+
+## Comment Guidelines
+
+Your review comments should be:
+1. Clear about why the issue is a bug
+2. Appropriately communicate severity
+3. Brief - at most 1 paragraph
+4. Code chunks max 3 lines, wrapped in markdown
+5. Clearly communicate scenarios/environments for bug
+6. Matter-of-fact tone without being accusatory
+7. Immediately graspable by original author
+8. Avoid excessive flattery
+
+## Additional Guidelines
+
+- Ignore trivial style unless it obscures meaning or violates documented standards.
+- Use one comment per distinct issue (or a multi-line range if necessary).
+- Use \`\`\`suggestion blocks ONLY for concrete replacement code (minimal lines; no commentary inside the block).
+- In every \`\`\`suggestion block, preserve the exact leading whitespace of the replaced lines.
+- Keep line ranges as short as possible (5-10 lines max).
 
 ## Priority Levels
 
-* [P0] Blocking / crash / exploit
-* [P1] Urgent correctness or security issue
-* [P2] Real bug with limited impact
-* [P3] Minor but real bug
+Use the following priority levels for your findings:
+- **[P0]** - Drop everything to fix. Blocking release/operations
+- **[P1]** - Urgent. Should be addressed in next cycle
+- **[P2]** - Normal. To be fixed eventually
+- **[P3]** - Low. Nice to have
 
----
 
-## Comment format
+## Output Format
 
 Each inline comment must be:
 
@@ -146,46 +71,50 @@ Each inline comment must be:
 
 One short paragraph explaining *why* this is a bug and *how* it manifests.
 
-* Max 1 paragraph
-* Code snippets ≤3 lines, Markdown fenced
-* Matter-of-fact, non-accusatory tone
 
----
 
-## Phase 3: Submit Review
+## PR Context
+
+* Repo: ${repoFullName}
+* PR Number: ${prNumber}
+* PR Head Ref: ${headRefName}
+* PR Head SHA: ${headSha}
+* PR Base Ref: ${baseRefName}
+* The PR branch has already been checked out
+* The gh CLI is installed and authenticated via GH_TOKEN
+
+## Review Process
+
+1. **Check existing comments**: Run \`gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews\` to see existing review threads. If issues appear fixed, reply "resolved" to those threads.
+
+2. **Get the merge diff**: Start by finding the merge diff between the PR branch and the base branch, e.g. (\`git merge-base HEAD "$(git rev-parse --abbrev-ref "${baseRefName}@{upstream}")"\`), then run \`git diff\` against that SHA to see what changes would be merged into the base branch.
 
-### When NOT to submit
+3. **Review the changes**: Analyze every changed line for bugs introduced by this PR. Provide prioritized, actionable findings.
 
-* PR is formatting-only
-* You cannot anchor a high-confidence issue to a specific changed line
-* All findings fail the Reporting Gate above
+## Submitting Review
 
 ### Tools & mechanics
 
-* Use \`github_inline_comment___create_inline_comment\`
+* Use \`github_inline_comment___create_inline_comment\` to post inline comments
   * Anchor using **path + side + line**
   * RIGHT = new/modified code, LEFT = removed code
-  * Line numbers must correspond to the chosen side
-* Use \`github_pr___submit_review\` for the summary
-* Use \`github_pr___delete_comment\` or \`github_pr___minimize_comment\` for outdated "no issues" comments
+* Use \`github_pr___submit_review\` for the overall summary
 * Use \`github_pr___reply_to_comment\` to acknowledge resolved issues
+* Use \`github_pr___delete_comment\` or \`github_pr___minimize_comment\` for outdated "no issues" comments
 * **Do NOT call** \`github_pr___resolve_review_thread\`
-* Do **not** approve or request changes
+* **Do NOT delete** comment ID ${context.droidCommentId}
 
 ### "No issues" handling
 
-* If no issues and a prior "no issues" comment exists → skip
+* If no issues and a prior "no issues" comment exists → skip submitting
 * If no issues and no prior comment exists → post a brief summary
 * If issues exist and a prior "no issues" comment exists → delete/minimize it
-* **Do NOT delete** comment ID ${context.droidCommentId}
-
----
 
-## Review summary
+### Overall assessment
 
 In the submitted review body:
-
 * State whether the changes are correct or incorrect
-* Provide a 1-3 sentence overall assessment
+* Provide a 1-3 sentence overall explanation
+* Do **not** approve or request changes
 `;
 }

From ae1f3e4930b99024eb7ac8e0732a5fa54c9c929c Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 14 Jan 2026 16:46:01 -0800
Subject: [PATCH 12/33] Revert "Use local /review prompt"

This reverts commit 37d99926970ea38f7fb706bd9a8b0bfd5850895c.
---
 src/create-prompt/templates/review-prompt.ts | 211 +++++++++++++------
 1 file changed, 141 insertions(+), 70 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index be5df41..a51341e 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -12,56 +12,131 @@ export function generateReviewPrompt(context: PreparedContext): string {
   const headSha = context.prBranchData?.headRefOid ?? "unknown";
   const baseRefName = context.eventData.baseBranch ?? "unknown";
 
-  return `# Review Guidelines
-  
-You are acting as a code reviewer for PR #${prNumber} in ${repoFullName}.
-
-## How Many Findings to Return
-
-Output all findings that the original author would fix if they knew about it. If there is no finding that a person would definitely love to see and fix, prefer outputting no findings. Do not stop at the first qualifying finding. Continue until you've listed every qualifying finding.
-
-## Key Guidelines for Bug Detection
-
-Only flag an issue as a bug if:
-1. It meaningfully impacts the accuracy, performance, security, or maintainability of the code.
-2. The bug is discrete and actionable (not a general issue).
-3. Fixing the bug does not demand a level of rigor not present in the rest of the codebase.
-4. The bug was introduced in the commit (pre-existing bugs should not be flagged).
-5. The author would likely fix the issue if made aware of it.
-6. The bug does not rely on unstated assumptions.
-7. Must identify provably affected code parts (not speculation).
-8. The bug is clearly not intentional.
-
-## Comment Guidelines
-
-Your review comments should be:
-1. Clear about why the issue is a bug
-2. Appropriately communicate severity
-3. Brief - at most 1 paragraph
-4. Code chunks max 3 lines, wrapped in markdown
-5. Clearly communicate scenarios/environments for bug
-6. Matter-of-fact tone without being accusatory
-7. Immediately graspable by original author
-8. Avoid excessive flattery
-
-## Additional Guidelines
-
-- Ignore trivial style unless it obscures meaning or violates documented standards.
-- Use one comment per distinct issue (or a multi-line range if necessary).
-- Use \`\`\`suggestion blocks ONLY for concrete replacement code (minimal lines; no commentary inside the block).
-- In every \`\`\`suggestion block, preserve the exact leading whitespace of the replaced lines.
-- Keep line ranges as short as possible (5-10 lines max).
+  return `You are performing an automated code review for PR #${prNumber} in ${repoFullName}.
+The gh CLI is installed and authenticated via GH_TOKEN.
+
+### Context
+
+* Repo: ${repoFullName}
+* PR Number: ${prNumber}
+* PR Head Ref: ${headRefName}
+* PR Head SHA: ${headSha}
+* PR Base Ref: ${baseRefName}
+* The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output.
+
+---
+
+## Objectives
+
+1. Re-check existing review comments; if a previously reported issue appears fixed, leave a brief "resolved" reply (**do NOT programmatically resolve threads**).
+2. Review the PR diff and identify **high-confidence, actionable bugs** introduced by this PR.
+3. Leave concise **inline comments (1-2 sentences)** for qualifying bugs. You may comment on unchanged lines *only* if the PR clearly triggers the issue—explain the trigger path.
+
+---
+
+## Procedure
+
+Follow these phases **in order**. Do not submit findings until Phase 1 and Phase 2 are complete.
+
+---
+
+## Phase 1: Context Gathering (REQUIRED — do not report bugs yet)
+
+1. Inspect existing comments:
+   \`gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews\`
+
+2. Compute the exact merge diff:
+
+   * \`git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}\`
+   * \`MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})\`
+   * \`git diff $MERGE_BASE..HEAD\`
+
+3. For **each file in the diff**, gather context:
+
+   * New imports → Grep to confirm the symbol exists
+   * New/modified functions → Grep for callers to understand usage
+   * Data-processing code → Read surrounding code to infer expected types
+
+4. Do **not** identify or report bugs yet. This phase is for understanding only.
+
+---
+
+## Phase 2: Issue Identification (ONLY after Phase 1)
+
+Review **every changed line**. You must complete the review even if you find issues early.
+
+### Analysis discipline
+
+* Verify with Grep/Read before flagging (no speculation)
+* Trace data flow to confirm a **real trigger path**
+* Check whether the pattern exists elsewhere (may be intentional)
+
+### Cross-reference checks
+
+* When reviewing tests, search for related constants, configs, or environment variables
+* Verify test assumptions match production behavior
+  *Example:* if a test sets an env var, Grep where it is consumed to confirm behavior matches prod
+
+### Import verification
+
+* Any import referencing a non-existent symbol is a bug (runtime ImportError)
+
+---
+
+## **Reporting Gate (CRITICAL)**
+
+Only report findings that meet **at least one** of the following:
+
+### Reportable bugs
+
+* **Definite runtime failures** (TypeError, KeyError, AttributeError, ImportError)
+* **Incorrect logic** with a clear trigger path and observable wrong result
+* **Security vulnerabilities** with a realistic exploit path
+* **Data corruption or loss**
+* **Breaking contract changes** (API / response / schema / validator behavior) where the contract is discoverable in code, tests, or docs
+
+### Do NOT report
+
+* Test code hygiene (unused vars, setup patterns) unless it causes test failure
+* Defensive "what-if" scenarios without a realistic trigger
+* Cosmetic issues (message text, naming, formatting)
+* Suggestions to "add guards," "add try/catch," or "be safer" without a concrete failure
+
+### Confidence rule
+
+* Prefer **DEFINITE** bugs over **POSSIBLE** bugs
+* Report POSSIBLE bugs **only** if you can identify a realistic execution path
+
+---
+
+## Targeted semantic passes (apply when relevant)
+
+* **API / validator / serializer changes**
+  Explicitly check for response-format or contract breakage
+  *(e.g., changed error response structure, removed or renamed fields, different status codes, altered required keys)*
+
+* **Auth / OAuth / session / state changes**
+  Check null-state handling, per-request randomness (state/nonce), and failure paths
+
+---
+
+## Deduplication
+
+* Never open a new finding for an issue previously reported by this bot on this PR
+* If an issue appears fixed, reply "resolved" in the existing thread
+
+---
 
 ## Priority Levels
 
-Use the following priority levels for your findings:
-- **[P0]** - Drop everything to fix. Blocking release/operations
-- **[P1]** - Urgent. Should be addressed in next cycle
-- **[P2]** - Normal. To be fixed eventually
-- **[P3]** - Low. Nice to have
+* [P0] Blocking / crash / exploit
+* [P1] Urgent correctness or security issue
+* [P2] Real bug with limited impact
+* [P3] Minor but real bug
 
+---
 
-## Output Format
+## Comment format
 
 Each inline comment must be:
 
@@ -71,50 +146,46 @@ Each inline comment must be:
 
 One short paragraph explaining *why* this is a bug and *how* it manifests.
 
+* Max 1 paragraph
+* Code snippets ≤3 lines, Markdown fenced
+* Matter-of-fact, non-accusatory tone
 
+---
 
-## PR Context
-
-* Repo: ${repoFullName}
-* PR Number: ${prNumber}
-* PR Head Ref: ${headRefName}
-* PR Head SHA: ${headSha}
-* PR Base Ref: ${baseRefName}
-* The PR branch has already been checked out
-* The gh CLI is installed and authenticated via GH_TOKEN
-
-## Review Process
-
-1. **Check existing comments**: Run \`gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews\` to see existing review threads. If issues appear fixed, reply "resolved" to those threads.
-
-2. **Get the merge diff**: Start by finding the merge diff between the PR branch and the base branch, e.g. (\`git merge-base HEAD "$(git rev-parse --abbrev-ref "${baseRefName}@{upstream}")"\`), then run \`git diff\` against that SHA to see what changes would be merged into the base branch.
+## Phase 3: Submit Review
 
-3. **Review the changes**: Analyze every changed line for bugs introduced by this PR. Provide prioritized, actionable findings.
+### When NOT to submit
 
-## Submitting Review
+* PR is formatting-only
+* You cannot anchor a high-confidence issue to a specific changed line
+* All findings fail the Reporting Gate above
 
 ### Tools & mechanics
 
-* Use \`github_inline_comment___create_inline_comment\` to post inline comments
+* Use \`github_inline_comment___create_inline_comment\`
   * Anchor using **path + side + line**
   * RIGHT = new/modified code, LEFT = removed code
-* Use \`github_pr___submit_review\` for the overall summary
-* Use \`github_pr___reply_to_comment\` to acknowledge resolved issues
+  * Line numbers must correspond to the chosen side
+* Use \`github_pr___submit_review\` for the summary
 * Use \`github_pr___delete_comment\` or \`github_pr___minimize_comment\` for outdated "no issues" comments
+* Use \`github_pr___reply_to_comment\` to acknowledge resolved issues
 * **Do NOT call** \`github_pr___resolve_review_thread\`
-* **Do NOT delete** comment ID ${context.droidCommentId}
+* Do **not** approve or request changes
 
 ### "No issues" handling
 
-* If no issues and a prior "no issues" comment exists → skip submitting
+* If no issues and a prior "no issues" comment exists → skip
 * If no issues and no prior comment exists → post a brief summary
 * If issues exist and a prior "no issues" comment exists → delete/minimize it
+* **Do NOT delete** comment ID ${context.droidCommentId}
+
+---
 
-### Overall assessment
+## Review summary
 
 In the submitted review body:
+
 * State whether the changes are correct or incorrect
-* Provide a 1-3 sentence overall explanation
-* Do **not** approve or request changes
+* Provide a 1-3 sentence overall assessment
 `;
 }

From afc66a42122f22a20cf3fa66ec8b08572d37b33e Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 14 Jan 2026 17:04:01 -0800
Subject: [PATCH 13/33] add no pager to get full diff, exclude P2/P3 issues

---
 src/create-prompt/templates/review-prompt.ts  |  3 +-
 .../templates/review-prompt.test.ts           | 34 ++++++-------------
 test/integration/review-flow.test.ts          | 10 +++---
 3 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index a51341e..98fd279 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -49,7 +49,7 @@ Follow these phases **in order**. Do not submit findings until Phase 1 and Phase
 
    * \`git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}\`
    * \`MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})\`
-   * \`git diff $MERGE_BASE..HEAD\`
+   * \`git --no-pager diff $MERGE_BASE..HEAD\`
 
 3. For **each file in the diff**, gather context:
 
@@ -158,6 +158,7 @@ One short paragraph explaining *why* this is a bug and *how* it manifests.
 
 * PR is formatting-only
 * You cannot anchor a high-confidence issue to a specific changed line
+* All findings are low-severity (P2/P3)
 * All findings fail the Reporting Gate above
 
 ### Tools & mechanics
diff --git a/test/create-prompt/templates/review-prompt.test.ts b/test/create-prompt/templates/review-prompt.test.ts
index 3833ded..ec23288 100644
--- a/test/create-prompt/templates/review-prompt.test.ts
+++ b/test/create-prompt/templates/review-prompt.test.ts
@@ -27,43 +27,31 @@ describe("generateReviewPrompt", () => {
 
     const prompt = generateReviewPrompt(context);
 
-    expect(prompt).toContain("Objectives:");
+    expect(prompt).toContain("## Objectives");
     expect(prompt).toContain("Re-check existing review comments");
     expect(prompt).toContain("git merge-base");
-    expect(prompt).toContain("git diff");
-    expect(prompt).toContain(
-      "gh pr diff 42 --repo test-owner/test-repo",
-    );
-    expect(prompt).toContain("gh api repos/test-owner/test-repo/pulls/42/files");
+    expect(prompt).toContain("git --no-pager diff $MERGE_BASE..HEAD");
     expect(prompt).toContain("github_inline_comment___create_inline_comment");
-    expect(prompt).toContain("github_pr___resolve_review_thread");
-    expect(prompt).toContain("every substantive comment must be inline on the changed line");
+    expect(prompt).toContain("**Do NOT call** `github_pr___resolve_review_thread`");
   });
 
   it("emphasizes accuracy gates and bug detection guidelines", () => {
     const prompt = generateReviewPrompt(createBaseContext());
 
-    expect(prompt).toContain("How Many Findings to Return:");
-    expect(prompt).toContain("Output all findings that the original author would fix");
-    expect(prompt).toContain("Key Guidelines for Bug Detection:");
-    expect(prompt).toContain("Priority Levels:");
+    expect(prompt).toContain("## Priority Levels");
     expect(prompt).toContain("[P0]");
-    expect(prompt).toContain("Never raise purely stylistic");
-    expect(prompt).toContain("Never repeat or re-raise an issue previously highlighted");
+    expect(prompt).toContain(
+      "Never open a new finding for an issue previously reported by this bot",
+    );
   });
 
   it("describes submission guidance", () => {
     const prompt = generateReviewPrompt(createBaseContext());
 
-    expect(prompt).toContain("Prefer github_inline_comment___create_inline_comment");
-    expect(prompt).toContain("gh api repos/test-owner/test-repo/pulls/42/reviews");
-    expect(prompt).toContain("Do not approve or request changes");
+    expect(prompt).toContain("Use `github_inline_comment___create_inline_comment`");
+    expect(prompt).toContain("Do **not** approve or request changes");
     expect(prompt).toContain("github_pr___submit_review");
-    expect(prompt).toContain("github_pr___resolve_review_thread");
-    expect(prompt).toContain("skip submitting another comment to avoid redundancy");
-    expect(prompt).toContain("Do not submit inline comments");
-    expect(prompt).toContain(
-      "Do not escalate style/formatting into P0/P1 just to justify leaving an inline comment",
-    );
+    expect(prompt).toContain("### When NOT to submit");
+    expect(prompt).toContain("All findings are low-severity (P2/P3)");
   });
 });
diff --git a/test/integration/review-flow.test.ts b/test/integration/review-flow.test.ts
index 61bbf6e..6095115 100644
--- a/test/integration/review-flow.test.ts
+++ b/test/integration/review-flow.test.ts
@@ -146,13 +146,11 @@ describe("review command integration", () => {
 
     expect(prompt).toContain("You are performing an automated code review");
     expect(prompt).toContain("github_inline_comment___create_inline_comment");
-    expect(prompt).toContain("How Many Findings to Return:");
-    expect(prompt).toContain("Output all findings that the original author would fix");
-    expect(prompt).toContain("Key Guidelines for Bug Detection:");
-    expect(prompt).toContain("Priority Levels:");
+    expect(prompt).toContain("## **Reporting Gate (CRITICAL)**");
+    expect(prompt).toContain("## Priority Levels");
     expect(prompt).toContain("gh pr view 7 --repo test-owner/test-repo --json comments,reviews");
-    expect(prompt).toContain("every substantive comment must be inline on the changed line");
-    expect(prompt).toContain("github_pr___resolve_review_thread");
+    expect(prompt).toContain("git --no-pager diff $MERGE_BASE..HEAD");
+    expect(prompt).toContain("**Do NOT call** `github_pr___resolve_review_thread`");
 
     const droidArgsCall = setOutputSpy.mock.calls.find(
       (call: unknown[]) => call[0] === "droid_args",

From 0983d10fdb13f02fe16550b6091d236cbb99c4f7 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 14 Jan 2026 17:04:27 -0800
Subject: [PATCH 14/33] Add common analysis patterns for reviews

---
 src/create-prompt/templates/review-prompt.ts | 126 +++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 98fd279..819bb9f 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -83,6 +83,132 @@ Review **every changed line**. You must complete the review even if you find iss
 
 ---
 
+## Systematic Analysis Patterns
+
+Apply these patterns during your review. These target common bug classes:
+
+### Logic & Variable Usage
+
+* When reviewing conditionals, verify the correct variable is referenced in each clause
+* Check for AND vs OR confusion in permission/validation logic (especially security-critical paths)
+* Verify return statements return the intended value:
+  - Not wrapper objects when data is expected (e.g., \`safeParse()\` result vs \`.data\`)
+  - Not intermediate variables when final result is expected
+  - Not wrong object properties (e.g., \`obj.original\` vs \`obj.modified\`)
+* In loops or transformations, confirm variable names match semantic purpose
+* Flag operations where variable names suggest type mismatches (e.g., \`*Time\` used where \`*Date\` expected)
+
+### Null/Undefined Safety
+
+* For each property access chain (\`a.b.c\`), verify none of the intermediate values can be null/undefined/None:
+  - Check API responses that might return null
+  - Check database queries that might return no results
+  - Check array operations like \`array[0]\` or \`array.find()\`
+* When Optional types are unwrapped (\`.get()\`, \`!\`, non-null assertions), verify presence is checked first
+* In conditional branches, verify null handling exists for all code paths
+* Pay special attention to:
+  - Authentication/authorization contexts (user might not be authenticated)
+  - Optional relationships (foreign keys, joined data)
+  - Map/dictionary lookups
+  - Configuration values that might not be set
+
+### Type Compatibility & Data Flow
+
+* Trace what types flow into math operations:
+  - \`floor\`, \`ceil\`, \`round\`, modulo on datetime/string inputs cause errors
+  - Arithmetic operations on mixed types
+* Verify comparison operators match types:
+  - Object reference equality (\`===\`, \`==\`) on different instances always false
+  - Comparing object types when value comparison intended (e.g., dayjs objects)
+* Check function parameters receive expected types after transformations:
+  - Serialization/deserialization boundary crossings
+  - Database field type conversions
+  - API request/response parsing
+* Flag string operations on numbers or vice versa without explicit conversion
+* When data flows through multiple functions, verify type consistency at each step
+
+### Async/Await Patterns (JavaScript/TypeScript)
+
+* **CRITICAL**: Flag \`forEach\`/\`map\`/\`filter\` with async callbacks - these don't await:
+  \`\`\`javascript
+  // BUG: async callbacks are fire-and-forget
+  items.forEach(async (item) => await process(item));
+  
+  // CORRECT: use for...of or Promise.all
+  for (const item of items) await process(item);
+  \`\`\`
+* Verify all async function calls are awaited when their result or side-effect is needed:
+  - Database writes that must complete before continuing
+  - API calls whose responses are used
+  - File operations that affect subsequent code
+* Check promise chains have proper error handling (\`.catch()\` or try/catch)
+* Verify parallel operations use \`Promise.all\`/\`Promise.allSettled\` appropriately
+
+### Security Vulnerabilities
+
+* **SSRF (Server-Side Request Forgery)**:
+  - Flag unvalidated URL fetching: \`open(url)\`, \`fetch(user_input)\`, \`curl\` with user input
+  - Verify URL allowlists exist for external requests
+* **XSS (Cross-Site Scripting)**:
+  - Check for unescaped user input in HTML/template contexts
+  - Verify template engines auto-escape by default
+* **Authentication & Session State**:
+  - OAuth state/nonce must be per-request random, not static or reused
+  - CSRF tokens must be unpredictable and verified
+  - Session data must not leak between requests
+* **Input Validation Bypasses**:
+  - \`indexOf()\`/\`startsWith()\` for origin validation can be bypassed (use exact allowlist matching)
+  - Case sensitivity in security checks (email blacklists, domain checks)
+* **Timing Attacks**:
+  - Secret/token comparison should use constant-time comparison functions
+* **Cache Poisoning**:
+  - Verify security decisions (permissions, auth) aren't cached asymmetrically
+  - If grants are cached, denials must also be cached (or neither)
+
+### Concurrency Issues (when applicable)
+
+* Check if shared state is modified without synchronization:
+  - Global variables or class fields accessed by multiple threads/goroutines
+  - Database records that can be modified concurrently
+* Verify double-checked locking re-checks condition after acquiring lock:
+  \`\`\`go
+  // BUG: doesn't re-check after lock
+  if !initialized {
+      lock.Lock()
+      initialize()
+      lock.Unlock()
+  }
+  
+  // CORRECT: re-check after acquiring lock
+  if !initialized {
+      lock.Lock()
+      if !initialized {
+          initialize()
+      }
+      lock.Unlock()
+  }
+  \`\`\`
+* Flag non-atomic operations on shared counters:
+  - \`count = count + 1\` in concurrent code (use atomic operations)
+  - Read-modify-write without locks
+* Check that resources accessed by multiple threads have proper synchronization
+
+### API Contract & Breaking Changes
+
+* When serializers/validators/response formatters change:
+  - Use Grep to find API consumers and test files
+  - Verify response structure remains compatible (field names, types, nesting)
+  - Check for removed fields, changed types, or new required fields
+* When database models/schemas change:
+  - Verify migrations include data backfill for existing records
+  - Check that existing queries still work with new schema
+* When function signatures change:
+  - Grep for all callers to verify compatibility
+  - Check if return type changes break caller assumptions
+* Review test files for expected API shapes and verify changes match tests
+
+---
+
 ## **Reporting Gate (CRITICAL)**
 
 Only report findings that meet **at least one** of the following:

From cbbe3c1b130932fd62548f2aaa9a14c87296670f Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Thu, 15 Jan 2026 12:21:57 -0800
Subject: [PATCH 15/33] Pre-compute PR diff and comments, enforce thorough
 review with xhigh reasoning

---
 action.yml                                    |  12 --
 src/create-prompt/index.ts                    |  36 +++--
 src/create-prompt/templates/review-prompt.ts  |  47 +++++--
 src/create-prompt/types.ts                    |   6 +
 src/tag/commands/review.ts                    | 132 +++++++++++++++++-
 .../templates/review-prompt.test.ts           |  58 +++++++-
 test/modes/tag/review-command.test.ts         |  74 +++++++++-
 7 files changed, 318 insertions(+), 47 deletions(-)

diff --git a/action.yml b/action.yml
index f05b5fe..3125856 100644
--- a/action.yml
+++ b/action.yml
@@ -178,18 +178,6 @@ runs:
       env:
         EXPERIMENTAL_ALLOWED_DOMAINS: ${{ inputs.experimental_allowed_domains }}
 
-    - name: Checkout PR branch for review
-      if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.review_pr_number != ''
-      shell: bash
-      run: |
-        echo "Checking out PR #${{ steps.prepare.outputs.review_pr_number }} branch for full file access..."
-        # Reset any local changes from the merge commit to allow clean checkout
-        git reset --hard HEAD
-        gh pr checkout ${{ steps.prepare.outputs.review_pr_number }}
-        echo "Successfully checked out PR branch: $(git rev-parse --abbrev-ref HEAD)"
-      env:
-        GH_TOKEN: ${{ steps.prepare.outputs.github_token }}
-
     - name: Run Droid Exec
       id: droid
       if: steps.prepare.outputs.contains_trigger == 'true'
diff --git a/src/create-prompt/index.ts b/src/create-prompt/index.ts
index 511fd34..d87c90f 100644
--- a/src/create-prompt/index.ts
+++ b/src/create-prompt/index.ts
@@ -15,9 +15,14 @@ import {
   isPullRequestReviewCommentEvent,
 } from "../github/context";
 import type { ParsedGitHubContext } from "../github/context";
-import type { CommonFields, PreparedContext, EventData } from "./types";
+import type {
+  CommonFields,
+  PreparedContext,
+  EventData,
+  ReviewArtifacts,
+} from "./types";
 
-export type { CommonFields, PreparedContext } from "./types";
+export type { CommonFields, PreparedContext, ReviewArtifacts } from "./types";
 
 const BASE_ALLOWED_TOOLS = [
   "Execute",
@@ -70,6 +75,7 @@ export function prepareContext(
   baseBranch?: string,
   droidBranch?: string,
   prBranchData?: { headRefName: string; headRefOid: string },
+  reviewArtifacts?: ReviewArtifacts,
 ): PreparedContext {
   const repository = context.repository.full_name;
   const triggerPhrase = context.inputs.triggerPhrase || "@droid";
@@ -108,15 +114,12 @@ export function prepareContext(
     commonFields.droidBranch = droidBranch;
   }
 
-  const eventData = buildEventData(
-    context,
-    {
-      commentId,
-      commentBody,
-      baseBranch,
-      droidBranch,
-    },
-  );
+  const eventData = buildEventData(context, {
+    commentId,
+    commentBody,
+    baseBranch,
+    droidBranch,
+  });
 
   const result: PreparedContext = {
     ...commonFields,
@@ -128,6 +131,10 @@ export function prepareContext(
     result.prBranchData = prBranchData;
   }
 
+  if (reviewArtifacts) {
+    result.reviewArtifacts = reviewArtifacts;
+  }
+
   return result;
 }
 
@@ -282,9 +289,7 @@ function buildEventData(
   }
 }
 
-export type PromptGenerator = (
-  context: PreparedContext,
-) => string;
+export type PromptGenerator = (context: PreparedContext) => string;
 
 export type PromptCreationOptions = {
   githubContext: ParsedGitHubContext;
@@ -296,6 +301,7 @@ export type PromptCreationOptions = {
   allowedTools?: string[];
   disallowedTools?: string[];
   includeActionsTools?: boolean;
+  reviewArtifacts?: ReviewArtifacts;
 };
 
 export async function createPrompt({
@@ -308,6 +314,7 @@ export async function createPrompt({
   allowedTools = [],
   disallowedTools = [],
   includeActionsTools = false,
+  reviewArtifacts,
 }: PromptCreationOptions) {
   try {
     const droidCommentId = commentId.toString();
@@ -317,6 +324,7 @@ export async function createPrompt({
       baseBranch,
       droidBranch,
       prBranchData,
+      reviewArtifacts,
     );
 
     await mkdir(`${process.env.RUNNER_TEMP || "/tmp"}/droid-prompts`, {
diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts
index 819bb9f..044cb44 100644
--- a/src/create-prompt/templates/review-prompt.ts
+++ b/src/create-prompt/templates/review-prompt.ts
@@ -12,6 +12,12 @@ export function generateReviewPrompt(context: PreparedContext): string {
   const headSha = context.prBranchData?.headRefOid ?? "unknown";
   const baseRefName = context.eventData.baseBranch ?? "unknown";
 
+  const diffPath =
+    context.reviewArtifacts?.diffPath ?? "$RUNNER_TEMP/droid-prompts/pr.diff";
+  const commentsPath =
+    context.reviewArtifacts?.commentsPath ??
+    "$RUNNER_TEMP/droid-prompts/existing_comments.json";
+
   return `You are performing an automated code review for PR #${prNumber} in ${repoFullName}.
 The gh CLI is installed and authenticated via GH_TOKEN.
 
@@ -24,6 +30,29 @@ The gh CLI is installed and authenticated via GH_TOKEN.
 * PR Base Ref: ${baseRefName}
 * The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output.
 
+### Pre-computed Review Artifacts
+
+The following files have been pre-computed and contain the COMPLETE data for this PR:
+
+* **Full PR Diff**: \`${diffPath}\` - Contains the COMPLETE diff of ALL changed files (already computed via \`git merge-base\` and \`git diff\`)
+* **Existing Comments**: \`${commentsPath}\` - Contains all existing PR comments and reviews in JSON format
+
+**IMPORTANT**: Use these pre-computed files instead of running git or gh commands to fetch diff/comments. This ensures you have access to the COMPLETE data without truncation.
+
+---
+
+## CRITICAL INSTRUCTION
+
+**DO NOT STOP UNTIL YOU HAVE REVIEWED EVERY SINGLE CHANGED FILE IN THE DIFF.**
+
+You MUST:
+1. Read the ENTIRE \`${diffPath}\` file first (use offset/limit if needed for large files)
+2. Create a mental checklist of ALL files that were changed
+3. Review EACH file systematically - do not skip any file
+4. Only submit your review AFTER you have analyzed every single changed file
+
+If the diff is large, work through it methodically. Do not rush. Do not skip files. The quality of your review depends on thoroughness.
+
 ---
 
 ## Objectives
@@ -42,22 +71,24 @@ Follow these phases **in order**. Do not submit findings until Phase 1 and Phase
 
 ## Phase 1: Context Gathering (REQUIRED — do not report bugs yet)
 
-1. Inspect existing comments:
-   \`gh pr view ${prNumber} --repo ${repoFullName} --json comments,reviews\`
+1. **Read existing comments** from the pre-computed file:
+   \`Read ${commentsPath}\`
 
-2. Compute the exact merge diff:
+2. **Read the COMPLETE diff** from the pre-computed file:
+   \`Read ${diffPath}\`
+   
+   If the file is large (>2400 lines), read it in chunks using offset/limit parameters.
+   **DO NOT proceed until you have read the ENTIRE diff.**
 
-   * \`git fetch origin ${baseRefName}:refs/remotes/origin/${baseRefName}\`
-   * \`MERGE_BASE=$(git merge-base HEAD refs/remotes/origin/${baseRefName})\`
-   * \`git --no-pager diff $MERGE_BASE..HEAD\`
+3. **List all changed files** - After reading the diff, explicitly list every file that was changed. This is your checklist.
 
-3. For **each file in the diff**, gather context:
+4. For **each file in the diff**, gather context:
 
    * New imports → Grep to confirm the symbol exists
    * New/modified functions → Grep for callers to understand usage
    * Data-processing code → Read surrounding code to infer expected types
 
-4. Do **not** identify or report bugs yet. This phase is for understanding only.
+5. Do **not** identify or report bugs yet. This phase is for understanding only.
 
 ---
 
diff --git a/src/create-prompt/types.ts b/src/create-prompt/types.ts
index 75af3a4..caffddc 100644
--- a/src/create-prompt/types.ts
+++ b/src/create-prompt/types.ts
@@ -103,6 +103,11 @@ export type EventData =
   | PullRequestEvent
   | PullRequestTargetEvent;
 
+export type ReviewArtifacts = {
+  diffPath: string;
+  commentsPath: string;
+};
+
 export type PreparedContext = CommonFields & {
   eventData: EventData;
   githubContext?: GitHubContext;
@@ -110,4 +115,5 @@ export type PreparedContext = CommonFields & {
     headRefName: string;
     headRefOid: string;
   };
+  reviewArtifacts?: ReviewArtifacts;
 };
diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts
index bc734c5..3d11b87 100644
--- a/src/tag/commands/review.ts
+++ b/src/tag/commands/review.ts
@@ -1,7 +1,10 @@
 import * as core from "@actions/core";
+import { execSync } from "child_process";
+import { writeFile, mkdir } from "fs/promises";
 import type { GitHubContext } from "../../github/context";
 import { fetchPRBranchData } from "../../github/data/pr-fetcher";
 import { createPrompt } from "../../create-prompt";
+import type { ReviewArtifacts } from "../../create-prompt";
 import { prepareMcpTools } from "../../mcp/install-mcp-server";
 import { createInitialComment } from "../../github/operations/comments/create-initial";
 import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools";
@@ -10,6 +13,90 @@ import { generateReviewPrompt } from "../../create-prompt/templates/review-promp
 import type { Octokits } from "../../github/api/client";
 import type { PrepareResult } from "../../prepare/types";
 
+const DIFF_MAX_BUFFER = 50 * 1024 * 1024; // 50MB buffer for large diffs
+
+async function computeAndStoreDiff(
+  baseRef: string,
+  tempDir: string,
+): Promise<string> {
+  const promptsDir = `${tempDir}/droid-prompts`;
+  await mkdir(promptsDir, { recursive: true });
+
+  // Unshallow the repo if it's a shallow clone (needed for merge-base to work)
+  try {
+    execSync("git fetch --unshallow", { encoding: "utf8", stdio: "pipe" });
+    console.log("Unshallowed repository");
+  } catch (e) {
+    // Already unshallowed or not a shallow clone, continue
+    console.log("Repository already has full history");
+  }
+
+  // Fetch the base branch (it may not exist locally yet)
+  try {
+    execSync(
+      `git fetch origin ${baseRef}:refs/remotes/origin/${baseRef}`,
+      { encoding: "utf8", stdio: "pipe" },
+    );
+    console.log(`Fetched base branch: ${baseRef}`);
+  } catch (e) {
+    // Branch might already exist, continue
+    console.log(`Base branch fetch skipped (may already exist): ${baseRef}`);
+  }
+
+  const mergeBase = execSync(
+    `git merge-base HEAD refs/remotes/origin/${baseRef}`,
+    { encoding: "utf8" },
+  ).trim();
+
+  const diff = execSync(`git --no-pager diff ${mergeBase}..HEAD`, {
+    encoding: "utf8",
+    maxBuffer: DIFF_MAX_BUFFER,
+  });
+
+  const diffPath = `${promptsDir}/pr.diff`;
+  await writeFile(diffPath, diff);
+  console.log(`Stored PR diff (${diff.length} bytes) at ${diffPath}`);
+  return diffPath;
+}
+
+async function fetchAndStoreComments(
+  octokit: Octokits,
+  owner: string,
+  repo: string,
+  prNumber: number,
+  tempDir: string,
+): Promise<string> {
+  const promptsDir = `${tempDir}/droid-prompts`;
+  await mkdir(promptsDir, { recursive: true });
+
+  const [issueComments, reviewComments] = await Promise.all([
+    octokit.rest.issues.listComments({
+      owner,
+      repo,
+      issue_number: prNumber,
+      per_page: 100,
+    }),
+    octokit.rest.pulls.listReviewComments({
+      owner,
+      repo,
+      pull_number: prNumber,
+      per_page: 100,
+    }),
+  ]);
+
+  const comments = {
+    issueComments: issueComments.data,
+    reviewComments: reviewComments.data,
+  };
+
+  const commentsPath = `${promptsDir}/existing_comments.json`;
+  await writeFile(commentsPath, JSON.stringify(comments, null, 2));
+  console.log(
+    `Stored existing comments (${issueComments.data.length} issue, ${reviewComments.data.length} review) at ${commentsPath}`,
+  );
+  return commentsPath;
+}
+
 type ReviewCommandOptions = {
   context: GitHubContext;
   octokit: Octokits;
@@ -46,6 +133,46 @@ export async function prepareReviewMode({
     currentBranch: prData.headRefName,
   };
 
+  // Checkout the PR branch before computing diff
+  // This ensures HEAD points to the PR head commit, not the merge commit or default branch
+  console.log(
+    `Checking out PR #${context.entityNumber} branch for diff computation...`,
+  );
+  try {
+    execSync("git reset --hard HEAD", { encoding: "utf8", stdio: "pipe" });
+    execSync(`gh pr checkout ${context.entityNumber}`, {
+      encoding: "utf8",
+      stdio: "pipe",
+      env: { ...process.env, GH_TOKEN: githubToken },
+    });
+    console.log(
+      `Successfully checked out PR branch: ${execSync("git rev-parse --abbrev-ref HEAD", { encoding: "utf8" }).trim()}`,
+    );
+  } catch (e) {
+    console.error(`Failed to checkout PR branch: ${e}`);
+    throw new Error(
+      `Failed to checkout PR #${context.entityNumber} branch for review`,
+    );
+  }
+
+  // Pre-compute review artifacts (diff and existing comments)
+  const tempDir = process.env.RUNNER_TEMP || "/tmp";
+  const [diffPath, commentsPath] = await Promise.all([
+    computeAndStoreDiff(prData.baseRefName, tempDir),
+    fetchAndStoreComments(
+      octokit,
+      context.repository.owner,
+      context.repository.repo,
+      context.entityNumber,
+      tempDir,
+    ),
+  ]);
+
+  const reviewArtifacts: ReviewArtifacts = {
+    diffPath,
+    commentsPath,
+  };
+
   await createPrompt({
     githubContext: context,
     commentId,
@@ -56,6 +183,7 @@ export async function prepareReviewMode({
       headRefOid: prData.headRefOid,
     },
     generatePrompt: generateReviewPrompt,
+    reviewArtifacts,
   });
   core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review");
 
@@ -104,10 +232,10 @@ export async function prepareReviewMode({
   const reviewModel = process.env.REVIEW_MODEL?.trim();
   const reasoningEffort = process.env.REASONING_EFFORT?.trim();
 
-  // Default behavior (behind the scenes): if neither is provided, run GPT-5.2 at high reasoning.
+  // Default behavior (behind the scenes): if neither is provided, run GPT-5.2 at xhigh reasoning.
   if (!reviewModel && !reasoningEffort) {
     droidArgParts.push(`--model "gpt-5.2"`);
-    droidArgParts.push(`--reasoning-effort "high"`);
+    droidArgParts.push(`--reasoning-effort "xhigh"`);
   } else {
     // Add model override if specified
     if (reviewModel) {
diff --git a/test/create-prompt/templates/review-prompt.test.ts b/test/create-prompt/templates/review-prompt.test.ts
index ec23288..0d0e1da 100644
--- a/test/create-prompt/templates/review-prompt.test.ts
+++ b/test/create-prompt/templates/review-prompt.test.ts
@@ -29,10 +29,58 @@ describe("generateReviewPrompt", () => {
 
     expect(prompt).toContain("## Objectives");
     expect(prompt).toContain("Re-check existing review comments");
-    expect(prompt).toContain("git merge-base");
-    expect(prompt).toContain("git --no-pager diff $MERGE_BASE..HEAD");
     expect(prompt).toContain("github_inline_comment___create_inline_comment");
-    expect(prompt).toContain("**Do NOT call** `github_pr___resolve_review_thread`");
+    expect(prompt).toContain(
+      "**Do NOT call** `github_pr___resolve_review_thread`",
+    );
+  });
+
+  it("includes pre-computed artifact references when provided", () => {
+    const context = createBaseContext({
+      reviewArtifacts: {
+        diffPath: "/tmp/test/pr.diff",
+        commentsPath: "/tmp/test/existing_comments.json",
+      },
+    });
+
+    const prompt = generateReviewPrompt(context);
+
+    expect(prompt).toContain("### Pre-computed Review Artifacts");
+    expect(prompt).toContain("/tmp/test/pr.diff");
+    expect(prompt).toContain("/tmp/test/existing_comments.json");
+    expect(prompt).toContain("COMPLETE diff");
+  });
+
+  it("includes critical instruction to review all files", () => {
+    const context = createBaseContext();
+
+    const prompt = generateReviewPrompt(context);
+
+    expect(prompt).toContain("## CRITICAL INSTRUCTION");
+    expect(prompt).toContain(
+      "DO NOT STOP UNTIL YOU HAVE REVIEWED EVERY SINGLE CHANGED FILE",
+    );
+    expect(prompt).toContain("Review EACH file systematically");
+  });
+
+  it("instructs to read from pre-computed files in Phase 1", () => {
+    const context = createBaseContext({
+      reviewArtifacts: {
+        diffPath: "/tmp/droid/pr.diff",
+        commentsPath: "/tmp/droid/comments.json",
+      },
+    });
+
+    const prompt = generateReviewPrompt(context);
+
+    expect(prompt).toContain(
+      "Read existing comments** from the pre-computed file",
+    );
+    expect(prompt).toContain("Read /tmp/droid/comments.json");
+    expect(prompt).toContain(
+      "Read the COMPLETE diff** from the pre-computed file",
+    );
+    expect(prompt).toContain("Read /tmp/droid/pr.diff");
   });
 
   it("emphasizes accuracy gates and bug detection guidelines", () => {
@@ -48,7 +96,9 @@ describe("generateReviewPrompt", () => {
   it("describes submission guidance", () => {
     const prompt = generateReviewPrompt(createBaseContext());
 
-    expect(prompt).toContain("Use `github_inline_comment___create_inline_comment`");
+    expect(prompt).toContain(
+      "Use `github_inline_comment___create_inline_comment`",
+    );
     expect(prompt).toContain("Do **not** approve or request changes");
     expect(prompt).toContain("github_pr___submit_review");
     expect(prompt).toContain("### When NOT to submit");
diff --git a/test/modes/tag/review-command.test.ts b/test/modes/tag/review-command.test.ts
index e31db2c..af21e1d 100644
--- a/test/modes/tag/review-command.test.ts
+++ b/test/modes/tag/review-command.test.ts
@@ -6,6 +6,8 @@ import { createMockContext } from "../../mockContext";
 import * as promptModule from "../../../src/create-prompt";
 import * as mcpInstaller from "../../../src/mcp/install-mcp-server";
 import * as comments from "../../../src/github/operations/comments/create-initial";
+import * as childProcess from "child_process";
+import * as fsPromises from "fs/promises";
 
 const MOCK_PR_DATA = {
   title: "PR for review",
@@ -27,16 +29,21 @@ const MOCK_PR_DATA = {
 describe("prepareReviewMode", () => {
   const originalArgs = process.env.DROID_ARGS;
   const originalReviewModel = process.env.REVIEW_MODEL;
+  const originalRunnerTemp = process.env.RUNNER_TEMP;
   let graphqlSpy: ReturnType<typeof spyOn>;
   let promptSpy: ReturnType<typeof spyOn>;
   let mcpSpy: ReturnType<typeof spyOn>;
   let setOutputSpy: ReturnType<typeof spyOn>;
   let createInitialSpy: ReturnType<typeof spyOn>;
   let exportVariableSpy: ReturnType<typeof spyOn>;
+  let execSyncSpy: ReturnType<typeof spyOn>;
+  let writeFileSpy: ReturnType<typeof spyOn>;
+  let mkdirSpy: ReturnType<typeof spyOn>;
 
   beforeEach(() => {
     process.env.DROID_ARGS = "";
     delete process.env.REVIEW_MODEL;
+    process.env.RUNNER_TEMP = "/tmp/test-runner";
 
     promptSpy = spyOn(promptModule, "createPrompt").mockResolvedValue();
     mcpSpy = spyOn(mcpInstaller, "prepareMcpTools").mockResolvedValue(
@@ -50,6 +57,21 @@ describe("prepareReviewMode", () => {
     exportVariableSpy = spyOn(core, "exportVariable").mockImplementation(
       () => {},
     );
+    // Mock execSync for git commands
+    execSyncSpy = spyOn(childProcess, "execSync").mockImplementation(
+      ((cmd: string) => {
+        if (cmd.includes("merge-base")) {
+          return "abc123def456\n";
+        }
+        if (cmd.includes("diff")) {
+          return "diff --git a/file.ts b/file.ts\n+added line\n";
+        }
+        return "";
+      }) as typeof childProcess.execSync,
+    );
+    // Mock file system operations
+    writeFileSpy = spyOn(fsPromises, "writeFile").mockResolvedValue();
+    mkdirSpy = spyOn(fsPromises, "mkdir").mockResolvedValue(undefined);
   });
 
   afterEach(() => {
@@ -59,12 +81,20 @@ describe("prepareReviewMode", () => {
     setOutputSpy.mockRestore();
     createInitialSpy.mockRestore();
     exportVariableSpy.mockRestore();
+    execSyncSpy.mockRestore();
+    writeFileSpy.mockRestore();
+    mkdirSpy.mockRestore();
     process.env.DROID_ARGS = originalArgs;
     if (originalReviewModel !== undefined) {
       process.env.REVIEW_MODEL = originalReviewModel;
     } else {
       delete process.env.REVIEW_MODEL;
     }
+    if (originalRunnerTemp !== undefined) {
+      process.env.RUNNER_TEMP = originalRunnerTemp;
+    } else {
+      delete process.env.RUNNER_TEMP;
+    }
   });
 
   it("prepares review flow with limited toolset when tracking comment exists", async () => {
@@ -81,7 +111,14 @@ describe("prepareReviewMode", () => {
     });
 
     const octokit = {
-      rest: {},
+      rest: {
+        issues: {
+          listComments: () => Promise.resolve({ data: [] }),
+        },
+        pulls: {
+          listReviewComments: () => Promise.resolve({ data: [] }),
+        },
+      },
       graphql: () =>
         Promise.resolve({
           repository: {
@@ -166,7 +203,14 @@ describe("prepareReviewMode", () => {
     });
 
     const octokit = {
-      rest: {},
+      rest: {
+        issues: {
+          listComments: () => Promise.resolve({ data: [] }),
+        },
+        pulls: {
+          listReviewComments: () => Promise.resolve({ data: [] }),
+        },
+      },
       graphql: () =>
         Promise.resolve({
           repository: {
@@ -227,7 +271,14 @@ describe("prepareReviewMode", () => {
     });
 
     const octokit = {
-      rest: {},
+      rest: {
+        issues: {
+          listComments: () => Promise.resolve({ data: [] }),
+        },
+        pulls: {
+          listReviewComments: () => Promise.resolve({ data: [] }),
+        },
+      },
       graphql: () =>
         Promise.resolve({
           repository: {
@@ -260,7 +311,9 @@ describe("prepareReviewMode", () => {
     const droidArgsCall = setOutputSpy.mock.calls.find(
       (call: unknown[]) => call[0] === "droid_args",
     ) as [string, string] | undefined;
-    expect(droidArgsCall?.[1]).toContain('--model "claude-sonnet-4-5-20250929"');
+    expect(droidArgsCall?.[1]).toContain(
+      '--model "claude-sonnet-4-5-20250929"',
+    );
   });
 
   it("does not add --model flag when REVIEW_MODEL is empty", async () => {
@@ -280,7 +333,14 @@ describe("prepareReviewMode", () => {
     });
 
     const octokit = {
-      rest: {},
+      rest: {
+        issues: {
+          listComments: () => Promise.resolve({ data: [] }),
+        },
+        pulls: {
+          listReviewComments: () => Promise.resolve({ data: [] }),
+        },
+      },
       graphql: () =>
         Promise.resolve({
           repository: {
@@ -313,8 +373,8 @@ describe("prepareReviewMode", () => {
     const droidArgsCall = setOutputSpy.mock.calls.find(
       (call: unknown[]) => call[0] === "droid_args",
     ) as [string, string] | undefined;
-    // When neither REVIEW_MODEL nor REASONING_EFFORT is provided, we default to gpt-5.2 at high reasoning.
+    // When neither REVIEW_MODEL nor REASONING_EFFORT is provided, we default to gpt-5.2 at xhigh reasoning.
     expect(droidArgsCall?.[1]).toContain('--model "gpt-5.2"');
-    expect(droidArgsCall?.[1]).toContain('--reasoning-effort "high"');
+    expect(droidArgsCall?.[1]).toContain('--reasoning-effort "xhigh"');
   });
 });

From e53cbd859e350442bda5a6efb08bde8e4ac360a6 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Thu, 15 Jan 2026 13:40:53 -0800
Subject: [PATCH 16/33] remove revoking of gh app token

---
 action.yml | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/action.yml b/action.yml
index 3125856..bee4e59 100644
--- a/action.yml
+++ b/action.yml
@@ -237,14 +237,3 @@ runs:
           ~/.factory/sessions/*
         if-no-files-found: ignore
         retention-days: 7
-
-    - name: Revoke app token
-      if: always() && inputs.github_token == '' && steps.prepare.outputs.skipped_due_to_workflow_validation_mismatch != 'true'
-      shell: bash
-      run: |
-        curl -L \
-          -X DELETE \
-          -H "Accept: application/vnd.github+json" \
-          -H "Authorization: Bearer ${{ steps.prepare.outputs.GITHUB_TOKEN }}" \
-          -H "X-GitHub-Api-Version: 2022-11-28" \
-          ${GITHUB_API_URL:-https://api.github.com}/installation/token

From 9ffb2edca7cd83dae25818a58ff5be26fb34f3c3 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Thu, 15 Jan 2026 13:58:24 -0800
Subject: [PATCH 17/33] switch gpt-5.2 reasoning effort back to high

---
 src/tag/commands/review.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts
index 3d11b87..dd43d14 100644
--- a/src/tag/commands/review.ts
+++ b/src/tag/commands/review.ts
@@ -232,10 +232,10 @@ export async function prepareReviewMode({
   const reviewModel = process.env.REVIEW_MODEL?.trim();
   const reasoningEffort = process.env.REASONING_EFFORT?.trim();
 
-  // Default behavior (behind the scenes): if neither is provided, run GPT-5.2 at xhigh reasoning.
+  // Default behavior (behind the scenes): if neither is provided, run GPT-5.2 at high reasoning.
   if (!reviewModel && !reasoningEffort) {
     droidArgParts.push(`--model "gpt-5.2"`);
-    droidArgParts.push(`--reasoning-effort "xhigh"`);
+    droidArgParts.push(`--reasoning-effort "high"`);
   } else {
     // Add model override if specified
     if (reviewModel) {

From 86ec27722d1e31c299a6beba3b924fec85338ebd Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 21 Jan 2026 11:47:37 -0800
Subject: [PATCH 18/33] fix tests

---
 base-action/test/run-droid-mcp.test.ts |  1 +
 test/integration/review-flow.test.ts   | 27 +++++++++++++++++++++++---
 test/modes/tag/review-command.test.ts  |  2 +-
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/base-action/test/run-droid-mcp.test.ts b/base-action/test/run-droid-mcp.test.ts
index ea652a2..fa1bc16 100644
--- a/base-action/test/run-droid-mcp.test.ts
+++ b/base-action/test/run-droid-mcp.test.ts
@@ -90,6 +90,7 @@ mock.module("child_process", () => ({
     });
   },
   spawn: mockSpawn,
+  execSync: (_cmd: string) => "",
 }));
 
 type RunDroidModule = typeof import("../src/run-droid");
diff --git a/test/integration/review-flow.test.ts b/test/integration/review-flow.test.ts
index 6095115..3bcb753 100644
--- a/test/integration/review-flow.test.ts
+++ b/test/integration/review-flow.test.ts
@@ -15,6 +15,7 @@ import * as createInitial from "../../src/github/operations/comments/create-init
 import * as mcpInstaller from "../../src/mcp/install-mcp-server";
 import * as actorValidation from "../../src/github/validation/actor";
 import * as core from "@actions/core";
+import * as childProcess from "node:child_process";
 
 describe("review command integration", () => {
   const originalRunnerTemp = process.env.RUNNER_TEMP;
@@ -26,6 +27,7 @@ describe("review command integration", () => {
   let actorSpy: ReturnType<typeof spyOn>;
   let setOutputSpy: ReturnType<typeof spyOn>;
   let exportVarSpy: ReturnType<typeof spyOn>;
+  let execSyncSpy: ReturnType<typeof spyOn>;
 
   beforeEach(async () => {
     tmpDir = await mkdtemp(path.join(os.tmpdir(), "review-int-"));
@@ -43,6 +45,16 @@ describe("review command integration", () => {
     actorSpy = spyOn(actorValidation, "checkHumanActor").mockResolvedValue();
     setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {});
     exportVarSpy = spyOn(core, "exportVariable").mockImplementation(() => {});
+
+    execSyncSpy = spyOn(childProcess, "execSync").mockImplementation(
+      ((cmd: string) => {
+        if (cmd.includes("merge-base")) return "abc123def456\n";
+        if (cmd.includes("git --no-pager diff")) {
+          return "diff --git a/file.ts b/file.ts\n+added line\n";
+        }
+        return "";
+      }) as typeof childProcess.execSync,
+    );
   });
 
   afterEach(async () => {
@@ -52,6 +64,7 @@ describe("review command integration", () => {
     actorSpy.mockRestore();
     setOutputSpy.mockRestore();
     exportVarSpy.mockRestore();
+    execSyncSpy.mockRestore();
 
     if (process.env.RUNNER_TEMP) {
       await rm(process.env.RUNNER_TEMP, { recursive: true, force: true });
@@ -96,7 +109,14 @@ describe("review command integration", () => {
     });
 
     const octokit = { 
-      rest: {}, 
+      rest: {
+        issues: {
+          listComments: () => Promise.resolve({ data: [] }),
+        },
+        pulls: {
+          listReviewComments: () => Promise.resolve({ data: [] }),
+        },
+      }, 
       graphql: () => Promise.resolve({
         repository: {
           pullRequest: {
@@ -148,8 +168,9 @@ describe("review command integration", () => {
     expect(prompt).toContain("github_inline_comment___create_inline_comment");
     expect(prompt).toContain("## **Reporting Gate (CRITICAL)**");
     expect(prompt).toContain("## Priority Levels");
-    expect(prompt).toContain("gh pr view 7 --repo test-owner/test-repo --json comments,reviews");
-    expect(prompt).toContain("git --no-pager diff $MERGE_BASE..HEAD");
+    expect(prompt).toContain("Pre-computed Review Artifacts");
+    expect(prompt).toContain("Full PR Diff");
+    expect(prompt).toContain("Existing Comments");
     expect(prompt).toContain("**Do NOT call** `github_pr___resolve_review_thread`");
 
     const droidArgsCall = setOutputSpy.mock.calls.find(
diff --git a/test/modes/tag/review-command.test.ts b/test/modes/tag/review-command.test.ts
index af21e1d..9a59d6d 100644
--- a/test/modes/tag/review-command.test.ts
+++ b/test/modes/tag/review-command.test.ts
@@ -375,6 +375,6 @@ describe("prepareReviewMode", () => {
     ) as [string, string] | undefined;
     // When neither REVIEW_MODEL nor REASONING_EFFORT is provided, we default to gpt-5.2 at xhigh reasoning.
     expect(droidArgsCall?.[1]).toContain('--model "gpt-5.2"');
-    expect(droidArgsCall?.[1]).toContain('--reasoning-effort "xhigh"');
+    expect(droidArgsCall?.[1]).toContain('--reasoning-effort "high"');
   });
 });

From 5da3f3fef34c6e7eee86126d1d58604b1930dfac Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 21 Jan 2026 11:55:06 -0800
Subject: [PATCH 19/33] test validator run

---
 action.yml                                    |  58 ++++-
 src/create-prompt/index.ts                    |   1 +
 .../templates/review-candidates-prompt.ts     | 137 +++++++++++
 .../templates/review-validator-prompt.ts      | 170 ++++++++++++++
 src/entrypoints/prepare-validator.ts          |  51 ++++
 src/tag/commands/review-validator.ts          | 222 ++++++++++++++++++
 src/tag/commands/review.ts                    |  41 +++-
 test/entrypoints/prepare-validator.test.ts    |  64 +++++
 8 files changed, 732 insertions(+), 12 deletions(-)
 create mode 100644 src/create-prompt/templates/review-candidates-prompt.ts
 create mode 100644 src/create-prompt/templates/review-validator-prompt.ts
 create mode 100644 src/entrypoints/prepare-validator.ts
 create mode 100644 src/tag/commands/review-validator.ts
 create mode 100644 test/entrypoints/prepare-validator.test.ts

diff --git a/action.yml b/action.yml
index bee4e59..390bde6 100644
--- a/action.yml
+++ b/action.yml
@@ -67,6 +67,18 @@ inputs:
     description: "Override reasoning effort for review flows (passed to Droid Exec as --reasoning-effort). If empty and review_model is also empty, the action defaults internally to gpt-5.2 at high reasoning."
     required: false
     default: ""
+  review_use_validator:
+    description: "Enable two-pass review: generate candidate comments to JSON, then validate and post only approved ones."
+    required: false
+    default: "true"
+  review_candidates_path:
+    description: "Path to write review candidates JSON (run #1 when review_use_validator=true)."
+    required: false
+    default: "${{ runner.temp }}/droid-prompts/review_candidates.json"
+  review_validated_path:
+    description: "Path to write review validated JSON (run #2 when review_use_validator=true)."
+    required: false
+    default: "${{ runner.temp }}/droid-prompts/review_validated.json"
   fill_model:
     description: "Override the model used for PR description fill (e.g., 'claude-sonnet-4-5-20250929', 'gpt-5.1-codex'). Only applies to fill flows."
     required: false
@@ -137,6 +149,9 @@ runs:
         AUTOMATIC_REVIEW: ${{ inputs.automatic_review }}
         REVIEW_MODEL: ${{ inputs.review_model }}
         REASONING_EFFORT: ${{ inputs.reasoning_effort }}
+        REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }}
+        REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }}
+        REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }}
         FILL_MODEL: ${{ inputs.fill_model }}
         ADDITIONAL_PERMISSIONS: ${{ inputs.additional_permissions }}
         DROID_ARGS: ${{ inputs.droid_args }}
@@ -204,6 +219,46 @@ runs:
         DETAILED_PERMISSION_MESSAGES: "1"
         FACTORY_API_KEY: ${{ inputs.factory_api_key }}
 
+    - name: Prepare validator
+      id: prepare_validator
+      if: steps.prepare.outputs.contains_trigger == 'true' && inputs.review_use_validator == 'true'
+      shell: bash
+      run: |
+        bun run ${GITHUB_ACTION_PATH}/src/entrypoints/prepare-validator.ts
+      env:
+        GITHUB_TOKEN: ${{ steps.prepare.outputs.github_token }}
+        REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }}
+        REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }}
+        REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }}
+        DROID_COMMENT_ID: ${{ steps.prepare.outputs.droid_comment_id }}
+
+    - name: Run Droid Exec (validator)
+      id: droid_validator
+      if: steps.prepare.outputs.contains_trigger == 'true' && inputs.review_use_validator == 'true'
+      shell: bash
+      run: |
+
+        # Run the base-action
+        bun run ${GITHUB_ACTION_PATH}/base-action/src/index.ts
+      env:
+        # Base-action inputs
+        INPUT_PROMPT_FILE: ${{ runner.temp }}/droid-prompts/droid-prompt.txt
+        INPUT_SETTINGS: ${{ inputs.settings }}
+        INPUT_DROID_ARGS: ${{ steps.prepare_validator.outputs.droid_args }}
+        INPUT_MCP_TOOLS: ${{ steps.prepare_validator.outputs.mcp_tools }}
+        INPUT_EXPERIMENTAL_SLASH_COMMANDS_DIR: ${{ github.action_path }}/slash-commands
+        INPUT_ACTION_INPUTS_PRESENT: ${{ steps.prepare.outputs.action_inputs_present }}
+        INPUT_PATH_TO_DROID_EXECUTABLE: ${{ inputs.path_to_droid_executable }}
+        INPUT_PATH_TO_BUN_EXECUTABLE: ${{ inputs.path_to_bun_executable }}
+        INPUT_SHOW_FULL_OUTPUT: ${{ inputs.show_full_output }}
+
+        # Model configuration
+        GITHUB_TOKEN: ${{ steps.prepare.outputs.GITHUB_TOKEN }}
+        NODE_VERSION: ${{ env.NODE_VERSION }}
+        DETAILED_PERMISSION_MESSAGES: "1"
+        FACTORY_API_KEY: ${{ inputs.factory_api_key }}
+
+
     - name: Update comment with job link
       if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.droid_comment_id && always()
       shell: bash
@@ -218,7 +273,7 @@ runs:
         GITHUB_EVENT_NAME: ${{ github.event_name }}
         TRIGGER_COMMENT_ID: ${{ github.event.comment.id }}
         IS_PR: ${{ github.event.issue.pull_request != null || github.event_name == 'pull_request_target' || github.event_name == 'pull_request_review_comment' }}
-        DROID_SUCCESS: ${{ steps.droid.outputs.conclusion == 'success' }}
+        DROID_SUCCESS: ${{ (inputs.review_use_validator == 'true' && steps.droid_validator.outputs.conclusion == 'success') || (inputs.review_use_validator != 'true' && steps.droid.outputs.conclusion == 'success') }}
         TRIGGER_USERNAME: ${{ github.event.comment.user.login || github.event.issue.user.login || github.event.pull_request.user.login || github.event.sender.login || github.triggering_actor || github.actor || '' }}
         PREPARE_SUCCESS: ${{ steps.prepare.outcome == 'success' }}
         PREPARE_ERROR: ${{ steps.prepare.outputs.prepare_error || '' }}
@@ -235,5 +290,6 @@ runs:
           ~/.factory/logs/droid-log-single.log
           ~/.factory/logs/console.log
           ~/.factory/sessions/*
+          ${{ runner.temp }}/droid-prompts/**
         if-no-files-found: ignore
         retention-days: 7
diff --git a/src/create-prompt/index.ts b/src/create-prompt/index.ts
index d87c90f..8ca3cd6 100644
--- a/src/create-prompt/index.ts
+++ b/src/create-prompt/index.ts
@@ -28,6 +28,7 @@ const BASE_ALLOWED_TOOLS = [
   "Execute",
   "Edit",
   "Create",
+  "ApplyPatch",
   "Read",
   "Glob",
   "Grep",
diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
new file mode 100644
index 0000000..34619ac
--- /dev/null
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -0,0 +1,137 @@
+import type { PreparedContext } from "../types";
+
+export function generateReviewCandidatesPrompt(context: PreparedContext): string {
+  const prNumber = context.eventData.isPR
+    ? context.eventData.prNumber
+    : context.githubContext && "entityNumber" in context.githubContext
+      ? String(context.githubContext.entityNumber)
+      : "unknown";
+
+  const repoFullName = context.repository;
+  const prHeadRef = context.prBranchData?.headRefName ?? "unknown";
+  const prHeadSha = context.prBranchData?.headRefOid ?? "unknown";
+  const prBaseRef = context.eventData.baseBranch ?? "unknown";
+
+  const diffPath =
+    context.reviewArtifacts?.diffPath ?? "$RUNNER_TEMP/droid-prompts/pr.diff";
+  const commentsPath =
+    context.reviewArtifacts?.commentsPath ??
+    "$RUNNER_TEMP/droid-prompts/existing_comments.json";
+
+  const reviewCandidatesPath =
+    process.env.REVIEW_CANDIDATES_PATH ??
+    "$RUNNER_TEMP/droid-prompts/review_candidates.json";
+
+  return `You are generating **candidate** inline review comments for PR #${prNumber} in ${repoFullName}.
+
+IMPORTANT: This is Phase 1 of a two-pass review pipeline.
+
+### Context
+
+* Repo: ${repoFullName}
+* PR Number: ${prNumber}
+* PR Head Ref: ${prHeadRef}
+* PR Head SHA: ${prHeadSha}
+* PR Base Ref: ${prBaseRef}
+
+### Pre-computed Review Artifacts
+
+The following files have been pre-computed and contain the COMPLETE data for this PR:
+
+* **Full PR Diff**: \`${diffPath}\`
+* **Existing Comments**: \`${commentsPath}\`
+
+### Output
+
+Write your candidates to: \`${reviewCandidatesPath}\`
+
+You must write a single JSON object with the schema below.
+
+### CRITICAL RULES
+
+* **DO NOT** post to GitHub in this run.
+* **DO NOT** call any PR mutation tools (inline comments, submit review, delete/minimize/reply/resolve, etc.).
+* You MAY update the tracking comment for progress.
+
+=======================
+
+## Phase 1: Context Gathering (REQUIRED — do not output yet)
+
+1. Read existing comments:
+   Read \`${commentsPath}\`
+
+2. Read the COMPLETE diff:
+   Read \`${diffPath}\`
+   If large, read in chunks (offset/limit). **Do not proceed until you have read the ENTIRE diff.**
+
+3. List every changed file (your checklist) and review ALL of them.
+
+=======================
+
+## Phase 2: Candidate Generation
+
+Generate **high-confidence, actionable** candidate inline comments following the same standards as the single-pass review:
+
+### Reporting Gate (same as review)
+
+Only include candidates that meet at least one:
+* Definite runtime failure
+* Incorrect logic with a concrete trigger path and wrong outcome
+* Security vulnerability with realistic exploit
+* Data corruption/loss
+* Breaking contract change (discoverable in code/tests)
+
+Do NOT include:
+* Style/naming/formatting
+* "What-if" speculation without a realistic execution path
+* Vague suggestions to add guards/try-catch without a concrete failure
+
+### Deduplication
+
+Use \`${commentsPath}\` to avoid duplicating issues already reported by this bot.
+If an issue appears fixed, do NOT create a new candidate; the validator run will handle replies.
+
+=======================
+
+## Phase 3: Write candidates JSON (REQUIRED)
+
+Write \`${reviewCandidatesPath}\` with this schema:
+
+\`\`\`json
+{
+  "version": 1,
+  "meta": {
+    "repo": "owner/repo",
+    "prNumber": 123,
+    "headSha": "<head sha>",
+    "baseRef": "main",
+    "generatedAt": "<ISO timestamp>"
+  },
+  "comments": [
+    {
+      "path": "src/index.ts",
+      "body": "[P1] Title\n\n1 paragraph.",
+      "line": 42,
+      "startLine": null,
+      "side": "RIGHT",
+      "commit_id": "<head sha>"
+    }
+  ],
+  "reviewSummary": {
+    "body": "1–3 sentence overall assessment"
+  }
+}
+\`\`\`
+
+Notes:
+* \`comments[]\` entries MUST match the input shape of \`github_inline_comment___create_inline_comment\`.
+* Use \`commit_id\` = \`${prHeadSha}\`.
+* \`startLine\` should be \`null\` unless you are making a multi-line comment.
+
+Then write the file using the local file tool.
+
+Tooling note:
+* If the tools list includes \`ApplyPatch\` (common for OpenAI models like GPT-5.2), use \`ApplyPatch\` to create/update the file at the exact path.
+* Otherwise, use \`Create\` (or \`Edit\` if overwriting) to write the file.
+`;
+}
diff --git a/src/create-prompt/templates/review-validator-prompt.ts b/src/create-prompt/templates/review-validator-prompt.ts
new file mode 100644
index 0000000..38561cf
--- /dev/null
+++ b/src/create-prompt/templates/review-validator-prompt.ts
@@ -0,0 +1,170 @@
+import type { PreparedContext } from "../types";
+
+export function generateReviewValidatorPrompt(context: PreparedContext): string {
+  const prNumber = context.eventData.isPR
+    ? context.eventData.prNumber
+    : context.githubContext && "entityNumber" in context.githubContext
+      ? String(context.githubContext.entityNumber)
+      : "unknown";
+
+  const repoFullName = context.repository;
+  const prHeadRef = context.prBranchData?.headRefName ?? "unknown";
+  const prHeadSha = context.prBranchData?.headRefOid ?? "unknown";
+  const prBaseRef = context.eventData.baseBranch ?? "unknown";
+
+  const diffPath =
+    context.reviewArtifacts?.diffPath ?? "$RUNNER_TEMP/droid-prompts/pr.diff";
+  const commentsPath =
+    context.reviewArtifacts?.commentsPath ??
+    "$RUNNER_TEMP/droid-prompts/existing_comments.json";
+
+  const reviewCandidatesPath =
+    process.env.REVIEW_CANDIDATES_PATH ??
+    "$RUNNER_TEMP/droid-prompts/review_candidates.json";
+  const reviewValidatedPath =
+    process.env.REVIEW_VALIDATED_PATH ??
+    "$RUNNER_TEMP/droid-prompts/review_validated.json";
+
+  return `You are validating candidate review comments for PR #${prNumber} in ${repoFullName}.
+
+IMPORTANT: This is Phase 2 (validator) of a two-pass review pipeline.
+
+### Context
+
+* Repo: ${repoFullName}
+* PR Number: ${prNumber}
+* PR Head Ref: ${prHeadRef}
+* PR Head SHA: ${prHeadSha}
+* PR Base Ref: ${prBaseRef}
+
+### Inputs
+
+Read:
+* Candidates: \`${reviewCandidatesPath}\`
+* Full PR Diff: \`${diffPath}\`
+* Existing Comments: \`${commentsPath}\`
+
+### Outputs
+
+1) Write validated results to: \`${reviewValidatedPath}\`
+2) Post ONLY the approved inline comments to the PR
+3) Submit a PR review summary (if applicable)
+
+=======================
+
+## CRITICAL REQUIREMENTS
+
+1. You MUST read and validate **every** candidate before posting anything.
+2. For each candidate, confirm:
+   * It is a real, actionable bug (not speculative)
+   * There is a realistic trigger path and observable wrong behavior
+   * The anchor is valid (path + side + line/startLine correspond to the diff)
+3. **Posting rule (STRICT):**
+   * Only post comments where \`status === "approved"\`.
+   * Never post rejected items.
+4. Preserve ordering: keep results in the same order as candidates.
+
+=======================
+
+## Phase 1: Load context (REQUIRED)
+
+1. Read existing comments:
+   Read \`${commentsPath}\`
+
+2. Read the COMPLETE diff:
+   Read \`${diffPath}\`
+   If large, read in chunks (offset/limit). **Do not proceed until you have read the ENTIRE diff.**
+
+3. Read candidates:
+   Read \`${reviewCandidatesPath}\`
+
+=======================
+
+## Phase 2: Validate candidates
+
+Apply the same Reporting Gate as review:
+
+### Approve ONLY if at least one is true
+* Definite runtime failure
+* Incorrect logic with a concrete trigger path and wrong outcome
+* Security vulnerability with realistic exploit
+* Data corruption/loss
+* Breaking contract change (discoverable in code/tests)
+
+Reject if:
+* It's speculative / "might" without a concrete trigger
+* It's stylistic / naming / formatting
+* It's not anchored to a valid changed line
+* It's already reported (dedupe against existing comments)
+
+When rejecting, write a concise reason.
+
+=======================
+
+## Phase 3: Write review_validated.json (REQUIRED)
+
+Write \`${reviewValidatedPath}\` with this schema:
+
+\`\`\`json
+{
+  "version": 1,
+  "meta": {
+    "repo": "owner/repo",
+    "prNumber": 123,
+    "headSha": "<head sha>",
+    "baseRef": "main",
+    "validatedAt": "<ISO timestamp>"
+  },
+  "results": [
+    {
+      "status": "approved",
+      "comment": {
+        "path": "src/index.ts",
+        "body": "[P1] Title\n\n1 paragraph.",
+        "line": 42,
+        "startLine": null,
+        "side": "RIGHT",
+        "commit_id": "<head sha>"
+      }
+    },
+    {
+      "status": "rejected",
+      "candidate": {
+        "path": "src/other.ts",
+        "body": "[P2] ...",
+        "line": 10,
+        "startLine": null,
+        "side": "RIGHT",
+        "commit_id": "<head sha>"
+      },
+      "reason": "Not a real bug because ..."
+    }
+  ],
+  "reviewSummary": {
+    "status": "approved",
+    "body": "1–3 sentence overall assessment"
+  }
+}
+\`\`\`
+
+Notes:
+* Use \`commit_id\` = \`${prHeadSha}\`.
+* \`results\` MUST have exactly one entry per candidate, in the same order.
+
+Then write the file using the local file tool.
+
+Tooling note:
+* If the tools list includes \`ApplyPatch\` (common for OpenAI models like GPT-5.2), use \`ApplyPatch\` to create/update the file at the exact path.
+* Otherwise, use \`Create\` (or \`Edit\` if overwriting) to write the file.
+
+=======================
+
+## Phase 4: Post approved items
+
+After writing \`${reviewValidatedPath}\`, post comments ONLY for \`status === "approved"\`:
+
+* For each approved entry, call \`github_inline_comment___create_inline_comment\` with the \`comment\` object.
+* Submit a review via \`github_pr___submit_review\` using the summary body (if there are any approved items OR a meaningful overall assessment).
+* Do not approve or request changes.
+`;
+}
diff --git a/src/entrypoints/prepare-validator.ts b/src/entrypoints/prepare-validator.ts
new file mode 100644
index 0000000..f62a942
--- /dev/null
+++ b/src/entrypoints/prepare-validator.ts
@@ -0,0 +1,51 @@
+#!/usr/bin/env bun
+
+import * as core from "@actions/core";
+import { setupGitHubToken } from "../github/token";
+import { createOctokit } from "../github/api/client";
+import { parseGitHubContext, isEntityContext } from "../github/context";
+import { prepareReviewValidatorMode } from "../tag/commands/review-validator";
+
+async function run() {
+  try {
+    const context = parseGitHubContext();
+
+    if (!isEntityContext(context) || !context.isPR) {
+      throw new Error("prepare-validator requires a pull request context");
+    }
+
+    // This entrypoint only makes sense when the workflow input is enabled.
+    if ((process.env.REVIEW_USE_VALIDATOR ?? "").trim() !== "true") {
+      throw new Error("reviewUseValidator must be true to run prepare-validator");
+    }
+
+    const githubToken = await setupGitHubToken();
+    const octokit = createOctokit(githubToken);
+
+    const trackingCommentId = Number(process.env.DROID_COMMENT_ID);
+    if (!trackingCommentId || Number.isNaN(trackingCommentId)) {
+      throw new Error("DROID_COMMENT_ID is required for validator run");
+    }
+
+    const result = await prepareReviewValidatorMode({
+      context,
+      octokit,
+      githubToken,
+      trackingCommentId,
+    });
+
+    core.setOutput("github_token", githubToken);
+    if (result?.mcpTools) core.setOutput("mcp_tools", result.mcpTools);
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    core.setFailed(`Prepare validator step failed with error: ${errorMessage}`);
+    core.setOutput("prepare_error", errorMessage);
+    process.exit(1);
+  }
+}
+
+export default run;
+
+if (import.meta.main) {
+  run();
+}
diff --git a/src/tag/commands/review-validator.ts b/src/tag/commands/review-validator.ts
new file mode 100644
index 0000000..2648ba3
--- /dev/null
+++ b/src/tag/commands/review-validator.ts
@@ -0,0 +1,222 @@
+import * as core from "@actions/core";
+import type { GitHubContext } from "../../github/context";
+import { isEntityContext } from "../../github/context";
+import type { Octokits } from "../../github/api/client";
+import { fetchPRBranchData } from "../../github/data/pr-fetcher";
+import { createPrompt } from "../../create-prompt";
+import type { ReviewArtifacts } from "../../create-prompt";
+import { prepareMcpTools } from "../../mcp/install-mcp-server";
+import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools";
+import type { PrepareResult } from "../../prepare/types";
+import { generateReviewValidatorPrompt } from "../../create-prompt/templates/review-validator-prompt";
+import { execSync } from "child_process";
+import { mkdir, writeFile } from "fs/promises";
+
+const DIFF_MAX_BUFFER = 50 * 1024 * 1024;
+
+async function computeAndStoreDiff(
+  baseRef: string,
+  tempDir: string,
+): Promise<string> {
+  const promptsDir = `${tempDir}/droid-prompts`;
+  await mkdir(promptsDir, { recursive: true });
+
+  try {
+    execSync("git fetch --unshallow", { encoding: "utf8", stdio: "pipe" });
+    console.log("Unshallowed repository");
+  } catch {
+    console.log("Repository already has full history");
+  }
+
+  try {
+    execSync(`git fetch origin ${baseRef}:refs/remotes/origin/${baseRef}`, {
+      encoding: "utf8",
+      stdio: "pipe",
+    });
+    console.log(`Fetched base branch: ${baseRef}`);
+  } catch (e) {
+    console.error(`Failed to fetch base branch ${baseRef}: ${e}`);
+    throw new Error(`Failed to fetch base branch ${baseRef} for review`);
+  }
+
+  const mergeBase = execSync(`git merge-base HEAD origin/${baseRef}`, {
+    encoding: "utf8",
+    stdio: "pipe",
+  }).trim();
+
+  const diff = execSync(`git --no-pager diff ${mergeBase}..HEAD`, {
+    encoding: "utf8",
+    stdio: "pipe",
+    maxBuffer: DIFF_MAX_BUFFER,
+  });
+
+  const diffPath = `${promptsDir}/pr.diff`;
+  await writeFile(diffPath, diff);
+  console.log(`Stored PR diff (${diff.length} bytes) at ${diffPath}`);
+
+  return diffPath;
+}
+
+async function fetchAndStoreComments(
+  octokit: Octokits,
+  owner: string,
+  repo: string,
+  prNumber: number,
+  tempDir: string,
+): Promise<string> {
+  const promptsDir = `${tempDir}/droid-prompts`;
+  await mkdir(promptsDir, { recursive: true });
+
+  const [issueComments, reviewComments] = await Promise.all([
+    octokit.rest.issues.listComments({ owner, repo, issue_number: prNumber }),
+    octokit.rest.pulls.listReviewComments({ owner, repo, pull_number: prNumber }),
+  ]);
+
+  const commentsPath = `${promptsDir}/existing_comments.json`;
+  const payload = {
+    issueComments: issueComments.data,
+    reviewComments: reviewComments.data,
+  };
+  await writeFile(commentsPath, JSON.stringify(payload, null, 2));
+  console.log(
+    `Stored existing comments (${issueComments.data.length} issue, ${reviewComments.data.length} review) at ${commentsPath}`,
+  );
+
+  return commentsPath;
+}
+
+export async function prepareReviewValidatorMode({
+  context,
+  octokit,
+  githubToken,
+  trackingCommentId,
+}: {
+  context: GitHubContext;
+  octokit: Octokits;
+  githubToken: string;
+  trackingCommentId: number;
+}): Promise<PrepareResult> {
+  if (!isEntityContext(context) || !context.isPR) {
+    throw new Error("review validator mode requires pull request context");
+  }
+
+  const prData = await fetchPRBranchData({
+    octokits: octokit,
+    repository: { owner: context.repository.owner, repo: context.repository.repo },
+    prNumber: context.entityNumber,
+  });
+
+  const tempDir = process.env.RUNNER_TEMP || "/tmp";
+
+  // Ensure PR branch is checked out (same as review mode)
+  try {
+    execSync("git reset --hard HEAD", { encoding: "utf8", stdio: "pipe" } as any);
+    execSync(`gh pr checkout ${context.entityNumber}`, {
+      encoding: "utf8",
+      stdio: "pipe",
+      env: { ...process.env, GH_TOKEN: githubToken },
+    });
+    console.log(
+      `Successfully checked out PR branch: ${execSync("git rev-parse --abbrev-ref HEAD", { encoding: "utf8" } as any).trim()}`,
+    );
+  } catch (e) {
+    console.error(`Failed to checkout PR branch: ${e}`);
+    throw new Error(`Failed to checkout PR #${context.entityNumber} branch for review`);
+  }
+
+  const [diffPath, commentsPath] = await Promise.all([
+    computeAndStoreDiff(prData.baseRefName, tempDir),
+    fetchAndStoreComments(
+      octokit,
+      context.repository.owner,
+      context.repository.repo,
+      context.entityNumber,
+      tempDir,
+    ),
+  ]);
+
+  const reviewArtifacts: ReviewArtifacts = { diffPath, commentsPath };
+
+  await createPrompt({
+    githubContext: context,
+    commentId: trackingCommentId,
+    baseBranch: prData.baseRefName,
+    droidBranch: prData.headRefName,
+    prBranchData: {
+      headRefName: prData.headRefName,
+      headRefOid: prData.headRefOid,
+    },
+    generatePrompt: generateReviewValidatorPrompt,
+    reviewArtifacts,
+  });
+
+  core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review");
+
+  const rawUserArgs = process.env.DROID_ARGS || "";
+  const normalizedUserArgs = normalizeDroidArgs(rawUserArgs);
+  const userAllowedMCPTools = parseAllowedTools(normalizedUserArgs).filter(
+    (tool) => tool.startsWith("github_") && tool.includes("___"),
+  );
+
+  const baseTools = [
+    "Read",
+    "Grep",
+    "Glob",
+    "LS",
+    "Execute",
+    "ApplyPatch",
+    "Create",
+    "Edit",
+    "github_comment___update_droid_comment",
+    "github_inline_comment___create_inline_comment",
+  ];
+
+  const validatorTools = ["github_pr___submit_review"];
+
+  const allowedTools = Array.from(
+    new Set([...baseTools, ...validatorTools, ...userAllowedMCPTools]),
+  );
+
+  const mcpTools = await prepareMcpTools({
+    githubToken,
+    owner: context.repository.owner,
+    repo: context.repository.repo,
+    droidCommentId: trackingCommentId.toString(),
+    allowedTools,
+    mode: "tag",
+    context,
+  });
+
+  const droidArgParts: string[] = [];
+  droidArgParts.push(`--enabled-tools "${allowedTools.join(",")}"`);
+
+  const reviewModel = process.env.REVIEW_MODEL?.trim();
+  const reasoningEffort = process.env.REASONING_EFFORT?.trim();
+
+  if (!reviewModel && !reasoningEffort) {
+    droidArgParts.push(`--model "gpt-5.2"`);
+    droidArgParts.push(`--reasoning-effort "high"`);
+  } else {
+    if (reviewModel) droidArgParts.push(`--model "${reviewModel}"`);
+    if (reasoningEffort) droidArgParts.push(`--reasoning-effort "${reasoningEffort}"`);
+  }
+
+  if (normalizedUserArgs) {
+    droidArgParts.push(normalizedUserArgs);
+  }
+
+  core.setOutput("droid_args", droidArgParts.join(" ").trim());
+  core.setOutput("mcp_tools", mcpTools);
+  core.setOutput("review_pr_number", context.entityNumber.toString());
+  core.setOutput("droid_comment_id", trackingCommentId.toString());
+
+  return {
+    commentId: trackingCommentId,
+    branchInfo: {
+      baseBranch: prData.baseRefName,
+      droidBranch: prData.headRefName,
+      currentBranch: prData.headRefName,
+    },
+    mcpTools,
+  };
+}
diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts
index dd43d14..569405f 100644
--- a/src/tag/commands/review.ts
+++ b/src/tag/commands/review.ts
@@ -10,6 +10,7 @@ import { createInitialComment } from "../../github/operations/comments/create-in
 import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools";
 import { isEntityContext } from "../../github/context";
 import { generateReviewPrompt } from "../../create-prompt/templates/review-prompt";
+import { generateReviewCandidatesPrompt } from "../../create-prompt/templates/review-candidates-prompt";
 import type { Octokits } from "../../github/api/client";
 import type { PrepareResult } from "../../prepare/types";
 
@@ -173,6 +174,9 @@ export async function prepareReviewMode({
     commentsPath,
   };
 
+  const reviewUseValidator =
+    (process.env.REVIEW_USE_VALIDATOR ?? "").trim() === "true";
+
   await createPrompt({
     githubContext: context,
     commentId,
@@ -182,7 +186,9 @@ export async function prepareReviewMode({
       headRefName: prData.headRefName,
       headRefOid: prData.headRefOid,
     },
-    generatePrompt: generateReviewPrompt,
+    generatePrompt: reviewUseValidator
+      ? generateReviewCandidatesPrompt
+      : generateReviewPrompt,
     reviewArtifacts,
   });
   core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review");
@@ -199,21 +205,34 @@ export async function prepareReviewMode({
     "Glob",
     "LS",
     "Execute",
+    "Create",
+    "ApplyPatch",
     "github_comment___update_droid_comment",
-    "github_inline_comment___create_inline_comment",
   ];
 
-  const reviewTools = [
-    "github_pr___list_review_comments",
-    "github_pr___submit_review",
-    "github_pr___delete_comment",
-    "github_pr___minimize_comment",
-    "github_pr___reply_to_comment",
-    "github_pr___resolve_review_thread",
-  ];
+  const reviewTools = reviewUseValidator
+    ? []
+    : [
+        "github_inline_comment___create_inline_comment",
+        "github_pr___list_review_comments",
+        "github_pr___submit_review",
+        "github_pr___delete_comment",
+        "github_pr___minimize_comment",
+        "github_pr___reply_to_comment",
+        "github_pr___resolve_review_thread",
+      ];
+
+  const safeUserAllowedMCPTools = reviewUseValidator
+    ? userAllowedMCPTools.filter(
+        (tool) =>
+          tool === "github_comment___update_droid_comment" ||
+          (!tool.startsWith("github_pr___") &&
+            tool !== "github_inline_comment___create_inline_comment"),
+      )
+    : userAllowedMCPTools;
 
   const allowedTools = Array.from(
-    new Set([...baseTools, ...reviewTools, ...userAllowedMCPTools]),
+    new Set([...baseTools, ...reviewTools, ...safeUserAllowedMCPTools]),
   );
 
   const mcpTools = await prepareMcpTools({
diff --git a/test/entrypoints/prepare-validator.test.ts b/test/entrypoints/prepare-validator.test.ts
new file mode 100644
index 0000000..6b3bc80
--- /dev/null
+++ b/test/entrypoints/prepare-validator.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it, spyOn } from "bun:test";
+import * as core from "@actions/core";
+import * as token from "../../src/github/token";
+import * as client from "../../src/github/api/client";
+import * as contextMod from "../../src/github/context";
+import * as validator from "../../src/tag/commands/review-validator";
+
+describe("prepare-validator entrypoint", () => {
+  it("fails when reviewUseValidator is false", async () => {
+    const setFailedSpy = spyOn(core, "setFailed").mockImplementation(() => {});
+    const setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {});
+    const exitSpy = spyOn(process, "exit").mockImplementation((() => {
+      throw new Error("process.exit");
+    }) as any);
+
+    spyOn(contextMod, "parseGitHubContext").mockReturnValue({
+      eventName: "issue_comment",
+      runId: "1",
+      repository: { owner: "o", repo: "r", full_name: "o/r" },
+      actor: "a",
+      inputs: {
+        triggerPhrase: "@droid",
+        assigneeTrigger: "",
+        labelTrigger: "droid",
+        useStickyComment: false,
+        allowedBots: "",
+        allowedNonWriteUsers: "",
+        trackProgress: false,
+        automaticReview: false,
+      },
+      payload: {} as any,
+      entityNumber: 1,
+      isPR: true,
+    } as any);
+
+    process.env.REVIEW_USE_VALIDATOR = "false";
+    process.env.DROID_COMMENT_ID = "123";
+
+    spyOn(token, "setupGitHubToken").mockResolvedValue("token");
+    spyOn(client, "createOctokit").mockReturnValue({} as any);
+    spyOn(validator, "prepareReviewValidatorMode").mockResolvedValue({
+      commentId: 123,
+      branchInfo: { baseBranch: "main", droidBranch: "feat" },
+      mcpTools: "{}",
+    } as any);
+
+    const mod = await import(
+      `../../src/entrypoints/prepare-validator.ts?test=${Math.random()}`,
+    );
+
+    await expect(mod.default()).rejects.toBeTruthy();
+
+    expect(setFailedSpy).toHaveBeenCalled();
+    expect(setOutputSpy).toHaveBeenCalledWith(
+      "prepare_error",
+      expect.stringContaining("reviewUseValidator"),
+    );
+    expect(exitSpy).toHaveBeenCalledWith(1);
+
+    setFailedSpy.mockRestore();
+    setOutputSpy.mockRestore();
+    exitSpy.mockRestore();
+  });
+});

From a7066fbd09c0c4d0ef3dfa78f1d805334975caa6 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 21 Jan 2026 15:56:32 -0800
Subject: [PATCH 20/33] increase candidate volume/coverage

---
 .../templates/review-candidates-prompt.ts     | 26 ++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 34619ac..f5a2c34 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -70,10 +70,34 @@ You must write a single JSON object with the schema below.
 
 ## Phase 2: Candidate Generation
 
-Generate **high-confidence, actionable** candidate inline comments following the same standards as the single-pass review:
+Generate **actionable** candidate inline comments.
+
+This phase is optimized for recall: you MAY include candidates that are **likely-real** (not 100% proven), because Phase 2 (validator) will filter. However, every candidate MUST still include:
+
+* A concrete trigger path (inputs/state that makes it happen)
+* An observable bad outcome (exception type, wrong return, corrupted data, violated security property)
+* The exact relevant symbols (function/class/variable names) so the validator can verify quickly
+
+### Mandatory second-pass recall sweep (REQUIRED)
+
+After completing your first pass, do a second sweep over **every changed file** specifically hunting for bugs in these high-yield categories:
+
+* Optional/None dereferences (Optional types, nullable attributes, missing membership/context)
+* Missing-key errors on external/untrusted dict/JSON payloads (KeyError, NoneType access)
+* Wrong-variable / shadowing mistakes (e.g., using an outer variable instead of the scoped/local one)
+* Type assumption bugs (e.g., numeric ops like math.floor/ceil on datetime/strings)
+* Serializer/validated_data contract mismatches (field is named one thing but code reads another)
+* Abstract base class contract issues (subclass missing required abstract members; runtime TypeError on instantiation)
+* Concurrency/process lifecycle hazards (spawn process types, isinstance guards, join/terminate loops)
+* Offset/cursor semantics mismatches (off-by-one, commit semantics, prev/next cursor behavior)
+* OAuth/security invariants (e.g., OAuth state must be per-flow unpredictable; deterministic state is a vulnerability)
+
+If you find a candidate during this sweep, it must still meet the Reporting Gate below, but do not stop early—complete the sweep across all files.
 
 ### Reporting Gate (same as review)
 
+Do NOT over-focus on P1 crashes. Also include medium-severity correctness/contract issues when they have a concrete trigger and a clear wrong outcome.
+
 Only include candidates that meet at least one:
 * Definite runtime failure
 * Incorrect logic with a concrete trigger path and wrong outcome

From 03d7bc8c1ad00d94ef93a4322be6c790370d54a3 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 21 Jan 2026 17:10:26 -0800
Subject: [PATCH 21/33] increase candidate volume/coverage 2

---
 .../templates/review-candidates-prompt.ts            | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index f5a2c34..0c50444 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -70,13 +70,7 @@ You must write a single JSON object with the schema below.
 
 ## Phase 2: Candidate Generation
 
-Generate **actionable** candidate inline comments.
-
-This phase is optimized for recall: you MAY include candidates that are **likely-real** (not 100% proven), because Phase 2 (validator) will filter. However, every candidate MUST still include:
-
-* A concrete trigger path (inputs/state that makes it happen)
-* An observable bad outcome (exception type, wrong return, corrupted data, violated security property)
-* The exact relevant symbols (function/class/variable names) so the validator can verify quickly
+Generate **actionable** candidate inline comments:
 
 ### Mandatory second-pass recall sweep (REQUIRED)
 
@@ -96,9 +90,7 @@ If you find a candidate during this sweep, it must still meet the Reporting Gate
 
 ### Reporting Gate (same as review)
 
-Do NOT over-focus on P1 crashes. Also include medium-severity correctness/contract issues when they have a concrete trigger and a clear wrong outcome.
-
-Only include candidates that meet at least one:
+Include candidates that meet one of these:
 * Definite runtime failure
 * Incorrect logic with a concrete trigger path and wrong outcome
 * Security vulnerability with realistic exploit

From 10804bd758907ed6f3b7cb8109df43fc3f11b2b8 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 21 Jan 2026 17:31:09 -0800
Subject: [PATCH 22/33] Revert "increase candidate volume/coverage 2"

This reverts commit 03d7bc8c1ad00d94ef93a4322be6c790370d54a3.
---
 .../templates/review-candidates-prompt.ts            | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 0c50444..f5a2c34 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -70,7 +70,13 @@ You must write a single JSON object with the schema below.
 
 ## Phase 2: Candidate Generation
 
-Generate **actionable** candidate inline comments:
+Generate **actionable** candidate inline comments.
+
+This phase is optimized for recall: you MAY include candidates that are **likely-real** (not 100% proven), because Phase 2 (validator) will filter. However, every candidate MUST still include:
+
+* A concrete trigger path (inputs/state that makes it happen)
+* An observable bad outcome (exception type, wrong return, corrupted data, violated security property)
+* The exact relevant symbols (function/class/variable names) so the validator can verify quickly
 
 ### Mandatory second-pass recall sweep (REQUIRED)
 
@@ -90,7 +96,9 @@ If you find a candidate during this sweep, it must still meet the Reporting Gate
 
 ### Reporting Gate (same as review)
 
-Include candidates that meet one of these:
+Do NOT over-focus on P1 crashes. Also include medium-severity correctness/contract issues when they have a concrete trigger and a clear wrong outcome.
+
+Only include candidates that meet at least one:
 * Definite runtime failure
 * Incorrect logic with a concrete trigger path and wrong outcome
 * Security vulnerability with realistic exploit

From c275c9bf3ac655f60a05be25ccd8024759217f79 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 21 Jan 2026 17:31:22 -0800
Subject: [PATCH 23/33] Revert "increase candidate volume/coverage"

This reverts commit a7066fbd09c0c4d0ef3dfa78f1d805334975caa6.
---
 .../templates/review-candidates-prompt.ts     | 26 +------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index f5a2c34..34619ac 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -70,34 +70,10 @@ You must write a single JSON object with the schema below.
 
 ## Phase 2: Candidate Generation
 
-Generate **actionable** candidate inline comments.
-
-This phase is optimized for recall: you MAY include candidates that are **likely-real** (not 100% proven), because Phase 2 (validator) will filter. However, every candidate MUST still include:
-
-* A concrete trigger path (inputs/state that makes it happen)
-* An observable bad outcome (exception type, wrong return, corrupted data, violated security property)
-* The exact relevant symbols (function/class/variable names) so the validator can verify quickly
-
-### Mandatory second-pass recall sweep (REQUIRED)
-
-After completing your first pass, do a second sweep over **every changed file** specifically hunting for bugs in these high-yield categories:
-
-* Optional/None dereferences (Optional types, nullable attributes, missing membership/context)
-* Missing-key errors on external/untrusted dict/JSON payloads (KeyError, NoneType access)
-* Wrong-variable / shadowing mistakes (e.g., using an outer variable instead of the scoped/local one)
-* Type assumption bugs (e.g., numeric ops like math.floor/ceil on datetime/strings)
-* Serializer/validated_data contract mismatches (field is named one thing but code reads another)
-* Abstract base class contract issues (subclass missing required abstract members; runtime TypeError on instantiation)
-* Concurrency/process lifecycle hazards (spawn process types, isinstance guards, join/terminate loops)
-* Offset/cursor semantics mismatches (off-by-one, commit semantics, prev/next cursor behavior)
-* OAuth/security invariants (e.g., OAuth state must be per-flow unpredictable; deterministic state is a vulnerability)
-
-If you find a candidate during this sweep, it must still meet the Reporting Gate below, but do not stop early—complete the sweep across all files.
+Generate **high-confidence, actionable** candidate inline comments following the same standards as the single-pass review:
 
 ### Reporting Gate (same as review)
 
-Do NOT over-focus on P1 crashes. Also include medium-severity correctness/contract issues when they have a concrete trigger and a clear wrong outcome.
-
 Only include candidates that meet at least one:
 * Definite runtime failure
 * Incorrect logic with a concrete trigger path and wrong outcome

From 9502f9e12ab6fcb7e3487075f244ca0ebb3d3937 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 21 Jan 2026 17:37:19 -0800
Subject: [PATCH 24/33] add thoroughness

---
 .../templates/review-candidates-prompt.ts     | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 34619ac..08c6c09 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -66,6 +66,25 @@ You must write a single JSON object with the schema below.
 
 3. List every changed file (your checklist) and review ALL of them.
 
+4. Read the underlying repo files for backend changes (REQUIRED)
+
+For each changed **backend** file, you MUST read the actual file from the checked-out workspace (not just the diff) to get surrounding context.
+
+Backend files include (at minimum):
+* Any changed file under \`src/\` that is NOT under \`static/\` and ends in \`.py\`, \`.ts\`, or \`.js\`.
+
+How to read files:
+* Use the absolute repo root shown in the system info (\`% pwd\`) and append the file path.
+  * Example: if \`pwd\` is \`/home/runner/work/droid-sentry/droid-sentry\` and the diff includes \`src/sentry/api/paginator.py\`, then read:
+    \`/home/runner/work/droid-sentry/droid-sentry/src/sentry/api/paginator.py\`
+* If a file is very large, it is OK to read it in targeted chunks, but you MUST include:
+  * the changed hunk region(s), plus
+  * the definition(s) of any key symbols referenced by your candidates.
+
+Work quota (STRICT):
+* Before writing output, you MUST read at least \`min(10, number_of_changed_backend_files)\` backend files from the repo root.
+* In your final response, list the exact backend file paths you read (for auditability).
+
 =======================
 
 ## Phase 2: Candidate Generation

From 56ab298a10e94e1b08b2d6f24b1052c4113d943e Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Fri, 23 Jan 2026 15:53:30 -0800
Subject: [PATCH 25/33] improvements to candidate and validator prompts

---
 .../templates/review-candidates-prompt.ts     | 150 ++++++------------
 .../templates/review-validator-prompt.ts      | 149 ++++++++---------
 2 files changed, 114 insertions(+), 185 deletions(-)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 08c6c09..462f7b9 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -22,99 +22,32 @@ export function generateReviewCandidatesPrompt(context: PreparedContext): string
     process.env.REVIEW_CANDIDATES_PATH ??
     "$RUNNER_TEMP/droid-prompts/review_candidates.json";
 
-  return `You are generating **candidate** inline review comments for PR #${prNumber} in ${repoFullName}.
+  return `You are a senior staff software engineer and expert code reviewer.
 
-IMPORTANT: This is Phase 1 of a two-pass review pipeline.
+Your task: Review PR #${prNumber} in ${repoFullName} and generate a JSON file with **high-confidence, actionable** review comments that pinpoint genuine issues.
 
-### Context
+<context>
+Repo: ${repoFullName}
+PR Number: ${prNumber}
+PR Head Ref: ${prHeadRef}
+PR Head SHA: ${prHeadSha}
+PR Base Ref: ${prBaseRef}
 
-* Repo: ${repoFullName}
-* PR Number: ${prNumber}
-* PR Head Ref: ${prHeadRef}
-* PR Head SHA: ${prHeadSha}
-* PR Base Ref: ${prBaseRef}
+Precomputed data files:
+- Full PR Diff: \`${diffPath}\`
+- Existing Comments: \`${commentsPath}\`
+</context>
 
-### Pre-computed Review Artifacts
+<review_guidelines>
+- You are currently checked out to the PR branch.
+- Review ALL modified files in the PR branch.
+- Focus on: functional correctness, syntax errors, logic bugs, broken dependencies/contracts/tests, security issues, and performance problems.
+- Do NOT duplicate comments already in \`${commentsPath}\`.
+- Only flag issues you are confident about—avoid speculative or stylistic nitpicks.
+</review_guidelines>
 
-The following files have been pre-computed and contain the COMPLETE data for this PR:
-
-* **Full PR Diff**: \`${diffPath}\`
-* **Existing Comments**: \`${commentsPath}\`
-
-### Output
-
-Write your candidates to: \`${reviewCandidatesPath}\`
-
-You must write a single JSON object with the schema below.
-
-### CRITICAL RULES
-
-* **DO NOT** post to GitHub in this run.
-* **DO NOT** call any PR mutation tools (inline comments, submit review, delete/minimize/reply/resolve, etc.).
-* You MAY update the tracking comment for progress.
-
-=======================
-
-## Phase 1: Context Gathering (REQUIRED — do not output yet)
-
-1. Read existing comments:
-   Read \`${commentsPath}\`
-
-2. Read the COMPLETE diff:
-   Read \`${diffPath}\`
-   If large, read in chunks (offset/limit). **Do not proceed until you have read the ENTIRE diff.**
-
-3. List every changed file (your checklist) and review ALL of them.
-
-4. Read the underlying repo files for backend changes (REQUIRED)
-
-For each changed **backend** file, you MUST read the actual file from the checked-out workspace (not just the diff) to get surrounding context.
-
-Backend files include (at minimum):
-* Any changed file under \`src/\` that is NOT under \`static/\` and ends in \`.py\`, \`.ts\`, or \`.js\`.
-
-How to read files:
-* Use the absolute repo root shown in the system info (\`% pwd\`) and append the file path.
-  * Example: if \`pwd\` is \`/home/runner/work/droid-sentry/droid-sentry\` and the diff includes \`src/sentry/api/paginator.py\`, then read:
-    \`/home/runner/work/droid-sentry/droid-sentry/src/sentry/api/paginator.py\`
-* If a file is very large, it is OK to read it in targeted chunks, but you MUST include:
-  * the changed hunk region(s), plus
-  * the definition(s) of any key symbols referenced by your candidates.
-
-Work quota (STRICT):
-* Before writing output, you MUST read at least \`min(10, number_of_changed_backend_files)\` backend files from the repo root.
-* In your final response, list the exact backend file paths you read (for auditability).
-
-=======================
-
-## Phase 2: Candidate Generation
-
-Generate **high-confidence, actionable** candidate inline comments following the same standards as the single-pass review:
-
-### Reporting Gate (same as review)
-
-Only include candidates that meet at least one:
-* Definite runtime failure
-* Incorrect logic with a concrete trigger path and wrong outcome
-* Security vulnerability with realistic exploit
-* Data corruption/loss
-* Breaking contract change (discoverable in code/tests)
-
-Do NOT include:
-* Style/naming/formatting
-* "What-if" speculation without a realistic execution path
-* Vague suggestions to add guards/try-catch without a concrete failure
-
-### Deduplication
-
-Use \`${commentsPath}\` to avoid duplicating issues already reported by this bot.
-If an issue appears fixed, do NOT create a new candidate; the validator run will handle replies.
-
-=======================
-
-## Phase 3: Write candidates JSON (REQUIRED)
-
-Write \`${reviewCandidatesPath}\` with this schema:
+<output_spec>
+Write output to \`${reviewCandidatesPath}\` using this exact schema:
 
 \`\`\`json
 {
@@ -137,20 +70,39 @@ Write \`${reviewCandidatesPath}\` with this schema:
     }
   ],
   "reviewSummary": {
-    "body": "1–3 sentence overall assessment"
+    "body": "1-3 sentence overall assessment"
   }
 }
 \`\`\`
 
-Notes:
-* \`comments[]\` entries MUST match the input shape of \`github_inline_comment___create_inline_comment\`.
-* Use \`commit_id\` = \`${prHeadSha}\`.
-* \`startLine\` should be \`null\` unless you are making a multi-line comment.
-
-Then write the file using the local file tool.
-
-Tooling note:
-* If the tools list includes \`ApplyPatch\` (common for OpenAI models like GPT-5.2), use \`ApplyPatch\` to create/update the file at the exact path.
-* Otherwise, use \`Create\` (or \`Edit\` if overwriting) to write the file.
+<schema_details>
+- **version**: Always \`1\`
+
+- **meta**: Metadata object
+  - \`repo\`: "${repoFullName}"
+  - \`prNumber\`: ${prNumber}
+  - \`headSha\`: "${prHeadSha}"
+  - \`baseRef\`: "${prBaseRef}"
+  - \`generatedAt\`: ISO 8601 timestamp (e.g., "2024-01-15T10:30:00Z")
+
+- **comments**: Array of comment objects
+  - \`path\`: Relative file path (e.g., "src/index.ts")
+  - \`body\`: Comment text starting with priority tag [P0|P1|P2], then title, then 1 paragraph explanation
+  - \`line\`: Target line number (single-line) or end line number (multi-line). Must be ≥ 0.
+  - \`startLine\`: \`null\` for single-line comments, or start line number for multi-line comments
+  - \`side\`: "RIGHT" for new/modified code (default), "LEFT" only for removed code
+  - \`commit_id\`: "${prHeadSha}"
+
+- **reviewSummary**:
+  - \`body\`: 1-3 sentence overall assessment
+</schema_details>
+</output_spec>
+
+<critical_constraints>
+**DO NOT** post to GitHub.
+**DO NOT** invoke any PR mutation tools (inline comments, submit review, delete/minimize/reply/resolve, etc.).
+**DO NOT** modify any files other than writing to \`${reviewCandidatesPath}\`.
+Output ONLY the JSON file—no additional commentary.
+</critical_constraints>
 `;
 }
diff --git a/src/create-prompt/templates/review-validator-prompt.ts b/src/create-prompt/templates/review-validator-prompt.ts
index 38561cf..1883904 100644
--- a/src/create-prompt/templates/review-validator-prompt.ts
+++ b/src/create-prompt/templates/review-validator-prompt.ts
@@ -25,85 +25,44 @@ export function generateReviewValidatorPrompt(context: PreparedContext): string
     process.env.REVIEW_VALIDATED_PATH ??
     "$RUNNER_TEMP/droid-prompts/review_validated.json";
 
-  return `You are validating candidate review comments for PR #${prNumber} in ${repoFullName}.
+  return `You are a senior staff software engineer and expert code reviewer.
+Your task: Act as a validator agent by reviewing candidate review comments for PR #${prNumber} in ${repoFullName}. Your primary objective is to identify and filter out false positives and unclear/vague comments. For each candidate, compare the comment against the codebase and related diff, and mark each as "approved" (if valid, clear, and actionable) or "rejected" (if it is a false positive, unclear/vague, stylistic, or a duplicate). Write your validation results to \`${reviewValidatedPath}\`, and submit only approved comments to GitHub.
 
-IMPORTANT: This is Phase 2 (validator) of a two-pass review pipeline.
+<context>
+Repo: ${repoFullName}
+PR Number: ${prNumber}
+PR Head Ref: ${prHeadRef}
+PR Head SHA: ${prHeadSha}
+PR Base Ref: ${prBaseRef}
 
-### Context
+Input files:
+- Comment Candidates: \`${reviewCandidatesPath}\`
+- Full PR Diff: \`${diffPath}\`
+- Existing Comments on the PR: \`${commentsPath}\`
 
-* Repo: ${repoFullName}
-* PR Number: ${prNumber}
-* PR Head Ref: ${prHeadRef}
-* PR Head SHA: ${prHeadSha}
-* PR Base Ref: ${prBaseRef}
+Output file:
+- Validated results: \`${reviewValidatedPath}\`
+</context>
 
-### Inputs
 
-Read:
-* Candidates: \`${reviewCandidatesPath}\`
-* Full PR Diff: \`${diffPath}\`
-* Existing Comments: \`${commentsPath}\`
+<execution_phases>
+## Phase 1: Validate Comment Candidates
 
-### Outputs
+<validation_criteria>
+- You are currently checked out to the PR branch.
+- For every candidate comment in \`${reviewCandidatesPath}\`, compare it against the codebase and PR diff to determine its validity and clarity.
 
-1) Write validated results to: \`${reviewValidatedPath}\`
-2) Post ONLY the approved inline comments to the PR
-3) Submit a PR review summary (if applicable)
+**Reject** any candidate that meets the following criteria:
+- False positive: Not an actual issue when matched against the codebase context.
+- Unclear or vague: Lacks sufficient detail to be actionable.
+- Stylistic / naming / formatting only.
+- Already reported (duplicate of existing comment).
+- For valid and clear candidates, mark as "approved"; for others, mark as "rejected" with a concise reason (e.g., "false positive", "unclear/vague", etc.).
+</validation_criteria>
 
-=======================
 
-## CRITICAL REQUIREMENTS
-
-1. You MUST read and validate **every** candidate before posting anything.
-2. For each candidate, confirm:
-   * It is a real, actionable bug (not speculative)
-   * There is a realistic trigger path and observable wrong behavior
-   * The anchor is valid (path + side + line/startLine correspond to the diff)
-3. **Posting rule (STRICT):**
-   * Only post comments where \`status === "approved"\`.
-   * Never post rejected items.
-4. Preserve ordering: keep results in the same order as candidates.
-
-=======================
-
-## Phase 1: Load context (REQUIRED)
-
-1. Read existing comments:
-   Read \`${commentsPath}\`
-
-2. Read the COMPLETE diff:
-   Read \`${diffPath}\`
-   If large, read in chunks (offset/limit). **Do not proceed until you have read the ENTIRE diff.**
-
-3. Read candidates:
-   Read \`${reviewCandidatesPath}\`
-
-=======================
-
-## Phase 2: Validate candidates
-
-Apply the same Reporting Gate as review:
-
-### Approve ONLY if at least one is true
-* Definite runtime failure
-* Incorrect logic with a concrete trigger path and wrong outcome
-* Security vulnerability with realistic exploit
-* Data corruption/loss
-* Breaking contract change (discoverable in code/tests)
-
-Reject if:
-* It's speculative / "might" without a concrete trigger
-* It's stylistic / naming / formatting
-* It's not anchored to a valid changed line
-* It's already reported (dedupe against existing comments)
-
-When rejecting, write a concise reason.
-
-=======================
-
-## Phase 3: Write review_validated.json (REQUIRED)
-
-Write \`${reviewValidatedPath}\` with this schema:
+<output_spec>
+Write to \`${reviewValidatedPath}\` using this exact schema:
 
 \`\`\`json
 {
@@ -142,29 +101,47 @@ Write \`${reviewValidatedPath}\` with this schema:
   ],
   "reviewSummary": {
     "status": "approved",
-    "body": "1–3 sentence overall assessment"
+    "body": "1-3 sentence overall assessment"
   }
 }
 \`\`\`
 
-Notes:
-* Use \`commit_id\` = \`${prHeadSha}\`.
-* \`results\` MUST have exactly one entry per candidate, in the same order.
-
-Then write the file using the local file tool.
-
-Tooling note:
-* If the tools list includes \`ApplyPatch\` (common for OpenAI models like GPT-5.2), use \`ApplyPatch\` to create/update the file at the exact path.
-* Otherwise, use \`Create\` (or \`Edit\` if overwriting) to write the file.
+<schema_details>
+- **version**: Always \`1\`
 
-=======================
+- **meta**: Metadata object
+  - \`repo\`: "${repoFullName}"
+  - \`prNumber\`: ${prNumber}
+  - \`headSha\`: "${prHeadSha}"
+  - \`baseRef\`: "${prBaseRef}"
+  - \`validatedAt\`: ISO 8601 timestamp
 
-## Phase 4: Post approved items
+- **results**: Array with exactly one entry per candidate (same order)
+  - For approved: \`{ "status": "approved", "comment": {...} }\`
+  - For rejected: \`{ "status": "rejected", "candidate": {...}, "reason": "..." }\`
 
-After writing \`${reviewValidatedPath}\`, post comments ONLY for \`status === "approved"\`:
+- **reviewSummary**:
+  - \`status\`: "approved" or "rejected"
+  - \`body\`: 1-3 sentence overall assessment
 
-* For each approved entry, call \`github_inline_comment___create_inline_comment\` with the \`comment\` object.
-* Submit a review via \`github_pr___submit_review\` using the summary body (if there are any approved items OR a meaningful overall assessment).
-* Do not approve or request changes.
+Notes:
+* Use \`commit_id\` = \`${prHeadSha}\`.
+</schema_details>
+</output_spec>
+
+<critical_constraints>
+- You MUST read and validate **every** candidate before posting anything.
+- You MUST reject false positives or unclear/vague comments.
+- You MUST write \`${reviewValidatedPath}\` before posting any comments.
+- ONLY post comments where \`status === "approved"\`—never post rejected items.
+- Preserve ordering: results must match candidate order exactly.
+</critical_constraints>
+
+
+## Phase 2: Post approved items
+- Call \`github_inline_comment___create_inline_comment\` for each \`status === "approved"\` entry
+- Call \`github_pr___submit_review\` with summary (if any approved items or meaningful assessment)
+- Do NOT approve or request changes on the review
+</execution_phases>
 `;
 }

From b89f550dedc3548395977c171c881293884e7c3a Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 27 Jan 2026 10:41:01 -0800
Subject: [PATCH 26/33] add common classes of bugs to candidate prompt

---
 .../templates/review-candidates-prompt.ts              | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 462f7b9..71a020c 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -42,6 +42,16 @@ Precomputed data files:
 - You are currently checked out to the PR branch.
 - Review ALL modified files in the PR branch.
 - Focus on: functional correctness, syntax errors, logic bugs, broken dependencies/contracts/tests, security issues, and performance problems.
+- High-signal bug patterns to actively check for (only comment when evidenced in the diff):
+  - Null/undefined/Optional dereferences; missing-key errors on untrusted/external dict/JSON payloads
+  - Resource leaks (unclosed files/streams/connections; missing cleanup on error paths)
+  - Injection vulnerabilities (SQL injection, XSS, command/template injection) and auth/security invariant violations
+  - OAuth/CSRF invariants: state must be per-flow unpredictable and validated; avoid deterministic/predictable state or missing state checks
+  - Concurrency/race/atomicity hazards (TOCTOU, lost updates, unsafe shared state, process/thread lifecycle bugs)
+  - Missing error handling for critical operations (network, persistence, auth, migrations, external APIs)
+  - Wrong-variable/shadowing mistakes; contract mismatches (serializer/validated_data, interfaces/abstract methods)
+  - Type-assumption bugs (e.g., numeric ops on datetime/strings, ordering key type mismatches)
+  - Offset/cursor/pagination semantic mismatches (off-by-one, prev/next behavior, commit semantics)
 - Do NOT duplicate comments already in \`${commentsPath}\`.
 - Only flag issues you are confident about—avoid speculative or stylistic nitpicks.
 </review_guidelines>

From 2ad216426b5a2129516d4d7ad3e8b7eba1cbbb54 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 27 Jan 2026 15:37:58 -0800
Subject: [PATCH 27/33] old validator prompt

---
 .../templates/review-validator-prompt.ts      | 149 ++++++++++--------
 1 file changed, 86 insertions(+), 63 deletions(-)

diff --git a/src/create-prompt/templates/review-validator-prompt.ts b/src/create-prompt/templates/review-validator-prompt.ts
index 1883904..38561cf 100644
--- a/src/create-prompt/templates/review-validator-prompt.ts
+++ b/src/create-prompt/templates/review-validator-prompt.ts
@@ -25,44 +25,85 @@ export function generateReviewValidatorPrompt(context: PreparedContext): string
     process.env.REVIEW_VALIDATED_PATH ??
     "$RUNNER_TEMP/droid-prompts/review_validated.json";
 
-  return `You are a senior staff software engineer and expert code reviewer.
-Your task: Act as a validator agent by reviewing candidate review comments for PR #${prNumber} in ${repoFullName}. Your primary objective is to identify and filter out false positives and unclear/vague comments. For each candidate, compare the comment against the codebase and related diff, and mark each as "approved" (if valid, clear, and actionable) or "rejected" (if it is a false positive, unclear/vague, stylistic, or a duplicate). Write your validation results to \`${reviewValidatedPath}\`, and submit only approved comments to GitHub.
+  return `You are validating candidate review comments for PR #${prNumber} in ${repoFullName}.
 
-<context>
-Repo: ${repoFullName}
-PR Number: ${prNumber}
-PR Head Ref: ${prHeadRef}
-PR Head SHA: ${prHeadSha}
-PR Base Ref: ${prBaseRef}
+IMPORTANT: This is Phase 2 (validator) of a two-pass review pipeline.
 
-Input files:
-- Comment Candidates: \`${reviewCandidatesPath}\`
-- Full PR Diff: \`${diffPath}\`
-- Existing Comments on the PR: \`${commentsPath}\`
+### Context
 
-Output file:
-- Validated results: \`${reviewValidatedPath}\`
-</context>
+* Repo: ${repoFullName}
+* PR Number: ${prNumber}
+* PR Head Ref: ${prHeadRef}
+* PR Head SHA: ${prHeadSha}
+* PR Base Ref: ${prBaseRef}
 
+### Inputs
 
-<execution_phases>
-## Phase 1: Validate Comment Candidates
+Read:
+* Candidates: \`${reviewCandidatesPath}\`
+* Full PR Diff: \`${diffPath}\`
+* Existing Comments: \`${commentsPath}\`
 
-<validation_criteria>
-- You are currently checked out to the PR branch.
-- For every candidate comment in \`${reviewCandidatesPath}\`, compare it against the codebase and PR diff to determine its validity and clarity.
+### Outputs
 
-**Reject** any candidate that meets the following criteria:
-- False positive: Not an actual issue when matched against the codebase context.
-- Unclear or vague: Lacks sufficient detail to be actionable.
-- Stylistic / naming / formatting only.
-- Already reported (duplicate of existing comment).
-- For valid and clear candidates, mark as "approved"; for others, mark as "rejected" with a concise reason (e.g., "false positive", "unclear/vague", etc.).
-</validation_criteria>
+1) Write validated results to: \`${reviewValidatedPath}\`
+2) Post ONLY the approved inline comments to the PR
+3) Submit a PR review summary (if applicable)
 
+=======================
 
-<output_spec>
-Write to \`${reviewValidatedPath}\` using this exact schema:
+## CRITICAL REQUIREMENTS
+
+1. You MUST read and validate **every** candidate before posting anything.
+2. For each candidate, confirm:
+   * It is a real, actionable bug (not speculative)
+   * There is a realistic trigger path and observable wrong behavior
+   * The anchor is valid (path + side + line/startLine correspond to the diff)
+3. **Posting rule (STRICT):**
+   * Only post comments where \`status === "approved"\`.
+   * Never post rejected items.
+4. Preserve ordering: keep results in the same order as candidates.
+
+=======================
+
+## Phase 1: Load context (REQUIRED)
+
+1. Read existing comments:
+   Read \`${commentsPath}\`
+
+2. Read the COMPLETE diff:
+   Read \`${diffPath}\`
+   If large, read in chunks (offset/limit). **Do not proceed until you have read the ENTIRE diff.**
+
+3. Read candidates:
+   Read \`${reviewCandidatesPath}\`
+
+=======================
+
+## Phase 2: Validate candidates
+
+Apply the same Reporting Gate as review:
+
+### Approve ONLY if at least one is true
+* Definite runtime failure
+* Incorrect logic with a concrete trigger path and wrong outcome
+* Security vulnerability with realistic exploit
+* Data corruption/loss
+* Breaking contract change (discoverable in code/tests)
+
+Reject if:
+* It's speculative / "might" without a concrete trigger
+* It's stylistic / naming / formatting
+* It's not anchored to a valid changed line
+* It's already reported (dedupe against existing comments)
+
+When rejecting, write a concise reason.
+
+=======================
+
+## Phase 3: Write review_validated.json (REQUIRED)
+
+Write \`${reviewValidatedPath}\` with this schema:
 
 \`\`\`json
 {
@@ -101,47 +142,29 @@ Write to \`${reviewValidatedPath}\` using this exact schema:
   ],
   "reviewSummary": {
     "status": "approved",
-    "body": "1-3 sentence overall assessment"
+    "body": "1–3 sentence overall assessment"
   }
 }
 \`\`\`
 
-<schema_details>
-- **version**: Always \`1\`
+Notes:
+* Use \`commit_id\` = \`${prHeadSha}\`.
+* \`results\` MUST have exactly one entry per candidate, in the same order.
+
+Then write the file using the local file tool.
 
-- **meta**: Metadata object
-  - \`repo\`: "${repoFullName}"
-  - \`prNumber\`: ${prNumber}
-  - \`headSha\`: "${prHeadSha}"
-  - \`baseRef\`: "${prBaseRef}"
-  - \`validatedAt\`: ISO 8601 timestamp
+Tooling note:
+* If the tools list includes \`ApplyPatch\` (common for OpenAI models like GPT-5.2), use \`ApplyPatch\` to create/update the file at the exact path.
+* Otherwise, use \`Create\` (or \`Edit\` if overwriting) to write the file.
 
-- **results**: Array with exactly one entry per candidate (same order)
-  - For approved: \`{ "status": "approved", "comment": {...} }\`
-  - For rejected: \`{ "status": "rejected", "candidate": {...}, "reason": "..." }\`
+=======================
 
-- **reviewSummary**:
-  - \`status\`: "approved" or "rejected"
-  - \`body\`: 1-3 sentence overall assessment
+## Phase 4: Post approved items
 
-Notes:
-* Use \`commit_id\` = \`${prHeadSha}\`.
-</schema_details>
-</output_spec>
-
-<critical_constraints>
-- You MUST read and validate **every** candidate before posting anything.
-- You MUST reject false positives or unclear/vague comments.
-- You MUST write \`${reviewValidatedPath}\` before posting any comments.
-- ONLY post comments where \`status === "approved"\`—never post rejected items.
-- Preserve ordering: results must match candidate order exactly.
-</critical_constraints>
-
-
-## Phase 2: Post approved items
-- Call \`github_inline_comment___create_inline_comment\` for each \`status === "approved"\` entry
-- Call \`github_pr___submit_review\` with summary (if any approved items or meaningful assessment)
-- Do NOT approve or request changes on the review
-</execution_phases>
+After writing \`${reviewValidatedPath}\`, post comments ONLY for \`status === "approved"\`:
+
+* For each approved entry, call \`github_inline_comment___create_inline_comment\` with the \`comment\` object.
+* Submit a review via \`github_pr___submit_review\` using the summary body (if there are any approved items OR a meaningful overall assessment).
+* Do not approve or request changes.
 `;
 }

From 00e3ea98a6976e96dbd48b3434b07614f14c642e Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 27 Jan 2026 18:26:10 -0800
Subject: [PATCH 28/33] Parallel Review: Phase 1 - Add file-group-reviewer
 subagent

---
 .factory/droids/file-group-reviewer.md | 74 ++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 .factory/droids/file-group-reviewer.md

diff --git a/.factory/droids/file-group-reviewer.md b/.factory/droids/file-group-reviewer.md
new file mode 100644
index 0000000..7ba489b
--- /dev/null
+++ b/.factory/droids/file-group-reviewer.md
@@ -0,0 +1,74 @@
+---
+name: file-group-reviewer
+description: Reviews an assigned subset of PR files for bugs, security issues, and correctness problems. Spawned in parallel by the main review agent to ensure thorough coverage.
+model: inherit
+tools: ["Read", "Grep", "Glob", "LS"]
+---
+
+You are a senior staff software engineer and expert code reviewer.
+
+Your task: Review the assigned files from the PR and generate a JSON array of **high-confidence, actionable** review comments that pinpoint genuine issues.
+
+<review_guidelines>
+- You are currently checked out to the PR branch.
+- Review ALL files assigned to you thoroughly.
+- Focus on: functional correctness, syntax errors, logic bugs, broken dependencies/contracts/tests, security issues, and performance problems.
+- High-signal bug patterns to actively check for (only comment when evidenced in the diff):
+  - Null/undefined/Optional dereferences; missing-key errors on untrusted/external dict/JSON payloads
+  - Resource leaks (unclosed files/streams/connections; missing cleanup on error paths)
+  - Injection vulnerabilities (SQL injection, XSS, command/template injection) and auth/security invariant violations
+  - OAuth/CSRF invariants: state must be per-flow unpredictable and validated; avoid deterministic/predictable state or missing state checks
+  - Concurrency/race/atomicity hazards (TOCTOU, lost updates, unsafe shared state, process/thread lifecycle bugs)
+  - Missing error handling for critical operations (network, persistence, auth, migrations, external APIs)
+  - Wrong-variable/shadowing mistakes; contract mismatches (serializer/validated_data, interfaces/abstract methods)
+  - Type-assumption bugs (e.g., numeric ops on datetime/strings, ordering key type mismatches)
+  - Offset/cursor/pagination semantic mismatches (off-by-one, prev/next behavior, commit semantics)
+- Only flag issues you are confident about—avoid speculative or stylistic nitpicks.
+</review_guidelines>
+
+<workflow>
+1. Read each assigned file in full to understand the context
+2. Read the relevant diff sections provided in the prompt
+3. Read related files as needed to fully understand the changes:
+   - Imported modules and dependencies
+   - Interfaces, base classes, and type definitions
+   - Related tests to understand expected behavior
+   - Callers/callees of modified functions
+   - Configuration files if behavior depends on them
+4. Analyze the changes for issues matching the bug patterns above
+5. For each issue found, verify it against the actual code and related context before including it
+</workflow>
+
+<output_format>
+Return your findings as a JSON array (no wrapper object, just the array):
+
+```json
+[
+  {
+    "path": "src/index.ts",
+    "body": "[P1] Title\n\n1 paragraph explanation.",
+    "line": 42,
+    "startLine": null,
+    "side": "RIGHT"
+  }
+]
+```
+
+If no issues found, return an empty array: `[]`
+
+Field definitions:
+- `path`: Relative file path (must match exactly as provided in your assignment)
+- `body`: Comment text starting with priority tag [P0|P1|P2], then title, then 1 paragraph explanation
+  - P0: Critical bugs (crashes, security vulnerabilities, data loss)
+  - P1: Important bugs (incorrect behavior, logic errors)
+  - P2: Minor bugs (edge cases, non-critical issues)
+- `line`: Target line number (single-line) or end line number (multi-line). Must be ≥ 0.
+- `startLine`: `null` for single-line comments, or start line number for multi-line comments
+- `side`: "RIGHT" for new/modified code (default), "LEFT" only for commenting on removed code
+</output_format>
+
+<constraints>
+- Output ONLY the JSON array—no additional commentary or markdown formatting around it.
+- Do not include `commit_id` in your output—the parent agent will add this.
+- Do not attempt to post comments to GitHub—just return the JSON array.
+</constraints>

From 925aeb6714421598d744d18ec6f74cee28442103 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Tue, 27 Jan 2026 18:56:55 -0800
Subject: [PATCH 29/33] Parallel Review: Phase 2 - Add Triage Phase section in
 Candidates Prompt

---
 .../templates/review-candidates-prompt.ts     | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 71a020c..2cdb701 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -56,6 +56,29 @@ Precomputed data files:
 - Only flag issues you are confident about—avoid speculative or stylistic nitpicks.
 </review_guidelines>
 
+<triage_phase>
+**Step 1: Analyze and group the modified files**
+
+Before reviewing, you must triage the PR to enable parallel review:
+
+1. Read the diff file (\`${diffPath}\`) to identify ALL modified files
+2. Group the files into logical clusters based on:
+   - **Related functionality**: Files in the same module or feature area
+   - **File relationships**: A component and its tests, a class and its interface
+   - **Risk profile**: Security-sensitive files together, database/migration files together
+   - **Dependencies**: Files that import each other or share types
+
+3. Document your grouping briefly, for example:
+   - Group 1 (Auth): src/auth/login.ts, src/auth/session.ts, tests/auth.test.ts
+   - Group 2 (API handlers): src/api/users.ts, src/api/orders.ts
+   - Group 3 (Database): src/db/migrations/001.ts, src/db/schema.ts
+
+Guidelines for grouping:
+- Aim for 3-6 groups to balance parallelism with context coherence
+- Keep related files together so reviewers have full context
+- Each group should be reviewable independently
+</triage_phase>
+
 <output_spec>
 Write output to \`${reviewCandidatesPath}\` using this exact schema:
 

From fb7b9ae6a0d311179fe44d7ebb9c05f3f0e1ef00 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 28 Jan 2026 09:58:22 -0800
Subject: [PATCH 30/33] Parallel Review: Phase 3 - Add parallel subagent calls
 phase

---
 .../templates/review-candidates-prompt.ts     | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 2cdb701..650cab1 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -79,6 +79,50 @@ Guidelines for grouping:
 - Each group should be reviewable independently
 </triage_phase>
 
+<parallel_review_phase>
+**Step 2: Spawn parallel subagents to review each group**
+
+After grouping, use the Task tool to spawn parallel \`file-group-reviewer\` subagents. Each subagent will review one group of files independently.
+
+**IMPORTANT**: Spawn ALL subagents in a single response to enable parallel execution.
+
+For each group, invoke the Task tool with:
+- \`subagent_type\`: "file-group-reviewer"
+- \`description\`: Brief label (e.g., "Review auth module")
+- \`prompt\`: Must include:
+  1. The PR context (repo, PR number, base/head refs)
+  2. The list of assigned files for this group
+  3. The relevant diff sections for those files (extract from \`${diffPath}\`)
+  4. Instructions to return a JSON array of findings
+
+Example Task invocation for one group:
+\`\`\`
+Task(
+  subagent_type: "file-group-reviewer",
+  description: "Review auth module",
+  prompt: """
+    Review the following files from PR #${prNumber} in ${repoFullName}.
+    
+    PR Context:
+    - Head SHA: ${prHeadSha}
+    - Base Ref: ${prBaseRef}
+    
+    Assigned files:
+    - src/auth/login.ts
+    - src/auth/session.ts
+    - tests/auth.test.ts
+    
+    Diff for these files:
+    <paste relevant diff sections here>
+    
+    Return a JSON array of issues found. If no issues, return [].
+  """
+)
+\`\`\`
+
+Spawn all group reviewers in parallel by including multiple Task calls in one response.
+</parallel_review_phase>
+
 <output_spec>
 Write output to \`${reviewCandidatesPath}\` using this exact schema:
 

From 11816e27620bb9db628c3d918122fe2fbe4ad575 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 28 Jan 2026 10:02:21 -0800
Subject: [PATCH 31/33] Parallel Review: Phase 4 - Aggregation Phase

---
 .../templates/review-candidates-prompt.ts         | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
index 650cab1..5e5d9ae 100644
--- a/src/create-prompt/templates/review-candidates-prompt.ts
+++ b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -123,6 +123,21 @@ Task(
 Spawn all group reviewers in parallel by including multiple Task calls in one response.
 </parallel_review_phase>
 
+<aggregation_phase>
+**Step 3: Aggregate subagent results**
+
+After all subagents complete, collect and merge their findings:
+
+1. **Collect results**: Each subagent returns a JSON array of comment objects
+2. **Merge arrays**: Combine all arrays into a single comments array
+3. **Add commit_id**: Add \`"commit_id": "${prHeadSha}"\` to each comment object
+4. **Deduplicate**: If multiple subagents flagged the same location (same path + line), keep only one comment (prefer higher priority: P0 > P1 > P2)
+5. **Filter existing**: Remove any comments that duplicate issues already in \`${commentsPath}\`
+6. **Write reviewSummary**: Synthesize a 1-3 sentence overall assessment based on all findings
+
+Write the final aggregated result to \`${reviewCandidatesPath}\` using the schema in \`<output_spec>\`.
+</aggregation_phase>
+
 <output_spec>
 Write output to \`${reviewCandidatesPath}\` using this exact schema:
 

From 7f727560c310143655c6c054f2fd271f4e1e9800 Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 28 Jan 2026 14:15:05 -0800
Subject: [PATCH 32/33] Parallel Review: Phase 5 - Move subagent to
 ~/.factory/droids

---
 action.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/action.yml b/action.yml
index 390bde6..378776f 100644
--- a/action.yml
+++ b/action.yml
@@ -184,6 +184,20 @@ runs:
         DROID_DIR=$(dirname "${{ inputs.path_to_droid_executable }}")
         echo "$DROID_DIR" >> "$GITHUB_PATH"
 
+    - name: Setup Custom Droids
+      if: steps.prepare.outputs.contains_trigger == 'true'
+      shell: bash
+      run: |
+        echo "Setting up custom droids..."
+        mkdir -p ~/.factory/droids
+        if [ -d "${GITHUB_ACTION_PATH}/.factory/droids" ]; then
+          cp -r ${GITHUB_ACTION_PATH}/.factory/droids/* ~/.factory/droids/
+          echo "Copied custom droids to ~/.factory/droids/"
+          ls -la ~/.factory/droids/
+        else
+          echo "No custom droids found in action"
+        fi
+
     - name: Setup Network Restrictions
       if: steps.prepare.outputs.contains_trigger == 'true' && inputs.experimental_allowed_domains != ''
       shell: bash
@@ -290,6 +304,7 @@ runs:
           ~/.factory/logs/droid-log-single.log
           ~/.factory/logs/console.log
           ~/.factory/sessions/*
+          ~/.factory/droids/*
           ${{ runner.temp }}/droid-prompts/**
         if-no-files-found: ignore
         retention-days: 7

From 355bbe3c6deda334dc6754a4d8aea0ffd4d36d1d Mon Sep 17 00:00:00 2001
From: User <varin@factory.ai>
Date: Wed, 28 Jan 2026 15:14:42 -0800
Subject: [PATCH 33/33] enable task tool

---
 src/tag/commands/review.ts | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts
index 569405f..d4d1a16 100644
--- a/src/tag/commands/review.ts
+++ b/src/tag/commands/review.ts
@@ -210,6 +210,9 @@ export async function prepareReviewMode({
     "github_comment___update_droid_comment",
   ];
 
+  // Task tool is needed for parallel subagent reviews in candidate generation phase
+  const candidateGenerationTools = reviewUseValidator ? ["Task"] : [];
+
   const reviewTools = reviewUseValidator
     ? []
     : [
@@ -232,7 +235,12 @@ export async function prepareReviewMode({
     : userAllowedMCPTools;
 
   const allowedTools = Array.from(
-    new Set([...baseTools, ...reviewTools, ...safeUserAllowedMCPTools]),
+    new Set([
+      ...baseTools,
+      ...candidateGenerationTools,
+      ...reviewTools,
+      ...safeUserAllowedMCPTools,
+    ]),
   );
 
   const mcpTools = await prepareMcpTools({