Created
March 9, 2026 23:44
-
-
Save roninjin10/a8b6e66b314b7bd3056e941e848dbda3 to your computer and use it in GitHub Desktop.
Token burning review
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // .jjhub/workflows/bug-review.tsx | |
| // | |
| // InfiniteTokenBurnReview — parallel bug-hunting agents that loop until | |
| // two consecutive rounds find nothing. Optionally fixes bugs inline. | |
| // | |
| // Usage (CI): | |
| // Triggered on landing request opened / ready-to-land | |
| // Also available via manual dispatch with fix-immediately toggle | |
| // | |
| import { z } from "zod"; | |
| import { | |
| createSmithers, | |
| Sequence, | |
| Parallel, | |
| Ralph, | |
| ClaudeCodeAgent, | |
| CodexAgent, | |
| GeminiAgent, | |
| runWorkflow, | |
| } from "smithers-orchestrator"; | |
| import { Workflow, Task, on } from "@jjhub/workflow"; | |
| // --------------------------------------------------------------------------- | |
| // Schemas | |
| // --------------------------------------------------------------------------- | |
| const bugSchema = z.object({ | |
| file: z.string(), | |
| line: z.number(), | |
| severity: z.enum(["critical", "high", "medium", "low"]), | |
| category: z.string(), | |
| description: z.string(), | |
| snippet: z.string(), | |
| }); | |
| const { smithers, outputs } = createSmithers({ | |
| focuses: z.object({ | |
| assignments: z.array( | |
| z.object({ | |
| agent: z.string(), | |
| focus: z.string(), | |
| searchPatterns: z.array(z.string()), | |
| }), | |
| ), | |
| }), | |
| hunt: z.object({ | |
| agent: z.string(), | |
| focus: z.string(), | |
| bugs: z.array(bugSchema), | |
| filesSearched: z.number(), | |
| }), | |
| verify: z.object({ | |
| confirmed: z.array(bugSchema), | |
| falsePositives: z.array(z.object({ description: z.string(), reason: z.string() })), | |
| }), | |
| ranked: z.object({ | |
| bugs: z.array(bugSchema.extend({ rank: z.number() })), | |
| summary: z.string(), | |
| totalFound: z.number(), | |
| totalConfirmed: z.number(), | |
| }), | |
| fix: z.object({ | |
| bugsFixed: z.array(z.object({ description: z.string(), file: z.string(), fix: z.string() })), | |
| filesChanged: z.array(z.string()), | |
| testsRun: z.boolean(), | |
| }), | |
| }); | |
| // --------------------------------------------------------------------------- | |
| // Agents — each hunts with a different lens | |
| // --------------------------------------------------------------------------- | |
| const SYSTEM_BASE = `You are a senior security/reliability engineer for JJHub, a jj-native code hosting platform. | |
| The codebase is Go (Chi router, sqlc, pgxpool) + Rust (clap, jj-lib). | |
| Read docs/specs/engineering.md for architecture. Read actual source files before reporting bugs. | |
| IMPORTANT: Only report REAL bugs you can prove with code evidence. No speculative issues.`; | |
| const focusPlanner = new ClaudeCodeAgent({ | |
| id: "focus-planner", | |
| model: "claude-opus-4-6", | |
| systemPrompt: `${SYSTEM_BASE} | |
| Your job is to plan the bug hunt. Analyze the codebase structure and assign each hunting agent | |
| a distinct focus area so they don't duplicate work. Consider: security, logic errors, concurrency, | |
| error handling, data validation, resource leaks, and edge cases.`, | |
| }); | |
| const claudeHunter = new ClaudeCodeAgent({ | |
| id: "claude-hunter", | |
| model: "claude-opus-4-6", | |
| systemPrompt: `${SYSTEM_BASE} | |
| Search methodically. Read files, grep for patterns, trace data flows. | |
| For each bug: cite the exact file, line, and a code snippet proving the issue. | |
| If you find nothing, return an empty bugs array — do NOT fabricate issues.`, | |
| }); | |
| const codexHunter = new CodexAgent({ | |
| id: "codex-hunter", | |
| model: "gpt-5.4", | |
| systemPrompt: `${SYSTEM_BASE} | |
| Search methodically. Read files, grep for patterns, trace data flows. | |
| For each bug: cite the exact file, line, and a code snippet proving the issue. | |
| If you find nothing, return an empty bugs array — do NOT fabricate issues.`, | |
| }); | |
| const geminiHunter = new GeminiAgent({ | |
| id: "gemini-hunter", | |
| model: "gemini-3.1-pro", | |
| systemPrompt: `${SYSTEM_BASE} | |
| Search methodically. Read files, grep for patterns, trace data flows. | |
| For each bug: cite the exact file, line, and a code snippet proving the issue. | |
| If you find nothing, return an empty bugs array — do NOT fabricate issues.`, | |
| }); | |
| const verifier = new ClaudeCodeAgent({ | |
| id: "verifier", | |
| model: "claude-opus-4-6", | |
| systemPrompt: `${SYSTEM_BASE} | |
| You are the verification gate. For every reported bug, read the actual source file and confirm | |
| the bug is real. Reject false positives with a clear reason. Be strict — only confirm bugs | |
| you can independently verify by reading the code.`, | |
| }); | |
| const fixer = new ClaudeCodeAgent({ | |
| id: "fixer", | |
| model: "claude-opus-4-6", | |
| systemPrompt: `${SYSTEM_BASE} | |
| Fix the confirmed bugs. Make minimal, surgical changes. Run tests after each fix. | |
| Do not refactor surrounding code. Do not add comments. Just fix the bug.`, | |
| }); | |
| // --------------------------------------------------------------------------- | |
| // InfiniteTokenBurnReview component | |
| // --------------------------------------------------------------------------- | |
| type Ctx = Parameters<Parameters<typeof smithers>[0]>[0]; | |
| function InfiniteTokenBurnReview({ | |
| fixImmediately = false, | |
| ctx, | |
| }: { | |
| fixImmediately?: boolean; | |
| ctx: Ctx; | |
| }) { | |
| // Track consecutive empty rounds for the termination condition | |
| const latestRanked = ctx.latest?.("ranked", "rank-bugs"); | |
| const prevRanked = | |
| ctx.iterationCount?.("ranked", "rank-bugs") >= 2 | |
| ? ctx.outputMaybe?.("ranked", { | |
| nodeId: "rank-bugs", | |
| iteration: (ctx.iteration ?? 1) - 2, | |
| }) | |
| : undefined; | |
| const currentEmpty = latestRanked?.totalConfirmed === 0; | |
| const previousEmpty = prevRanked?.totalConfirmed === 0; | |
| const twoConsecutiveEmpty = currentEmpty && previousEmpty; | |
| // Get focus assignments for this round | |
| const focuses = ctx.latest?.("focuses", "plan-focuses"); | |
| const assignments = focuses?.assignments ?? []; | |
| const claudeFocus = assignments.find((a: any) => a.agent === "claude"); | |
| const codexFocus = assignments.find((a: any) => a.agent === "codex"); | |
| const geminiFocus = assignments.find((a: any) => a.agent === "gemini"); | |
| // Gather all bugs from this round's hunters | |
| const claudeHunt = ctx.latest?.("hunt", "hunt-claude"); | |
| const codexHunt = ctx.latest?.("hunt", "hunt-codex"); | |
| const geminiHunt = ctx.latest?.("hunt", "hunt-gemini"); | |
| const allBugs = [ | |
| ...(claudeHunt?.bugs ?? []), | |
| ...(codexHunt?.bugs ?? []), | |
| ...(geminiHunt?.bugs ?? []), | |
| ]; | |
| const verified = ctx.latest?.("verify", "verify-bugs"); | |
| const latestFix = ctx.latest?.("fix", "fix-bugs"); | |
| // Previous round context for agents to avoid re-reporting | |
| const previousBugsSummary = latestRanked | |
| ? `\nPrevious round found ${latestRanked.totalConfirmed} confirmed bugs:\n${latestRanked.summary}\n\nDo NOT re-report these. Find NEW bugs only.` | |
| : ""; | |
| return ( | |
| <Ralph | |
| until={twoConsecutiveEmpty} | |
| maxIterations={50} | |
| onMaxReached="return-last" | |
| > | |
| <Sequence> | |
| {/* Phase 1: Plan focused search areas for each agent */} | |
| <Task id="plan-focuses" output={outputs.focuses} agent={focusPlanner}> | |
| {`Plan the bug hunt for this round. Assign each of the 3 hunting agents (claude, codex, gemini) | |
| a distinct focus area so they search different parts of the codebase. | |
| Consider these dimensions: | |
| - Security (injection, auth bypass, SSRF, path traversal) | |
| - Logic errors (off-by-one, nil derefs, wrong comparisons) | |
| - Concurrency (race conditions, deadlocks, missing locks) | |
| - Error handling (swallowed errors, wrong status codes, missing rollbacks) | |
| - Data validation (missing bounds checks, type confusion, overflow) | |
| - Resource leaks (unclosed connections, goroutine leaks, file handles) | |
| For each agent, provide: | |
| - agent: "claude" | "codex" | "gemini" | |
| - focus: a 1-sentence description of what to hunt for | |
| - searchPatterns: grep patterns and file globs to start with | |
| ${previousBugsSummary}`} | |
| </Task> | |
| {/* Phase 2: Three agents hunt in parallel with assigned focuses */} | |
| <Parallel> | |
| <Task id="hunt-claude" output={outputs.hunt} agent={claudeHunter} retries={2}> | |
| {`Hunt for bugs in the JJHub codebase. | |
| YOUR ASSIGNED FOCUS: ${claudeFocus?.focus ?? "Security vulnerabilities and injection flaws"} | |
| SUGGESTED SEARCH PATTERNS: ${JSON.stringify(claudeFocus?.searchPatterns ?? ["*.go", "*.rs"])} | |
| Search the codebase thoroughly within your focus area. Read the actual source files. | |
| For every bug you find, include the exact file path, line number, a severity rating, | |
| a category, a description, and a verbatim code snippet showing the issue. | |
| Return an empty bugs array if you find nothing — honesty is more valuable than volume. | |
| ${previousBugsSummary}`} | |
| </Task> | |
| <Task id="hunt-codex" output={outputs.hunt} agent={codexHunter} retries={2}> | |
| {`Hunt for bugs in the JJHub codebase. | |
| YOUR ASSIGNED FOCUS: ${codexFocus?.focus ?? "Logic errors and edge cases"} | |
| SUGGESTED SEARCH PATTERNS: ${JSON.stringify(codexFocus?.searchPatterns ?? ["*.go", "*.rs"])} | |
| Search the codebase thoroughly within your focus area. Read the actual source files. | |
| For every bug you find, include the exact file path, line number, a severity rating, | |
| a category, a description, and a verbatim code snippet showing the issue. | |
| Return an empty bugs array if you find nothing — honesty is more valuable than volume. | |
| ${previousBugsSummary}`} | |
| </Task> | |
| <Task id="hunt-gemini" output={outputs.hunt} agent={geminiHunter} retries={2}> | |
| {`Hunt for bugs in the JJHub codebase. | |
| YOUR ASSIGNED FOCUS: ${geminiFocus?.focus ?? "Error handling and resource management"} | |
| SUGGESTED SEARCH PATTERNS: ${JSON.stringify(geminiFocus?.searchPatterns ?? ["*.go", "*.rs"])} | |
| Search the codebase thoroughly within your focus area. Read the actual source files. | |
| For every bug you find, include the exact file path, line number, a severity rating, | |
| a category, a description, and a verbatim code snippet showing the issue. | |
| Return an empty bugs array if you find nothing — honesty is more valuable than volume. | |
| ${previousBugsSummary}`} | |
| </Task> | |
| </Parallel> | |
| {/* Phase 3: Verify — independent agent reads source to confirm/reject */} | |
| <Task | |
| id="verify-bugs" | |
| output={outputs.verify} | |
| agent={verifier} | |
| retries={2} | |
| skipIf={allBugs.length === 0} | |
| > | |
| {`Verify the following ${allBugs.length} bug reports by reading the actual source code. | |
| For each bug, open the cited file and line, read the surrounding context, and determine | |
| if the bug is real or a false positive. | |
| Bug reports to verify: | |
| ${JSON.stringify(allBugs, null, 2)} | |
| Be strict. Only confirm bugs where you can independently see the problem in the code. | |
| Reject anything speculative, already handled, or based on misreading the code.`} | |
| </Task> | |
| {/* Phase 4: Rank confirmed bugs by severity */} | |
| <Task id="rank-bugs" output={outputs.ranked} skipIf={allBugs.length === 0}> | |
| {() => { | |
| const confirmed = verified?.confirmed ?? []; | |
| const severityOrder = { critical: 0, high: 1, medium: 2, low: 3 }; | |
| const sorted = [...confirmed].sort( | |
| (a, b) => | |
| (severityOrder[a.severity] ?? 4) - (severityOrder[b.severity] ?? 4), | |
| ); | |
| const ranked = sorted.map((bug, i) => ({ ...bug, rank: i + 1 })); | |
| const summary = ranked | |
| .map( | |
| (b) => | |
| `#${b.rank} [${b.severity}] ${b.file}:${b.line} — ${b.description}`, | |
| ) | |
| .join("\n"); | |
| return { | |
| bugs: ranked, | |
| summary: summary || "No confirmed bugs this round.", | |
| totalFound: allBugs.length, | |
| totalConfirmed: confirmed.length, | |
| }; | |
| }} | |
| </Task> | |
| {/* Phase 5 (optional): Fix confirmed bugs before next round */} | |
| {fixImmediately ? ( | |
| <Task | |
| id="fix-bugs" | |
| output={outputs.fix} | |
| agent={fixer} | |
| retries={2} | |
| skipIf={(verified?.confirmed?.length ?? 0) === 0} | |
| > | |
| {`Fix the following confirmed bugs. Make minimal, surgical changes only. | |
| Bugs to fix (ranked by severity): | |
| ${JSON.stringify(latestRanked?.bugs ?? [], null, 2)} | |
| Rules: | |
| - Fix one bug at a time, verify each fix compiles | |
| - Run \`go vet ./...\` and \`cargo check\` after changes | |
| - Do NOT refactor surrounding code | |
| - Do NOT add comments or documentation | |
| - If a fix requires a test, add the minimal test case`} | |
| </Task> | |
| ) : null} | |
| </Sequence> | |
| </Ralph> | |
| ); | |
| } | |
| // --------------------------------------------------------------------------- | |
| // Workflow | |
| // --------------------------------------------------------------------------- | |
| const workflow = smithers((ctx) => ( | |
| <Workflow | |
| name="Bug Review" | |
| triggers={[ | |
| on.landingRequest.opened(), | |
| on.landingRequest.readyToLand(), | |
| on.manualDispatch({ "fix-immediately": false }), | |
| ]} | |
| > | |
| <InfiniteTokenBurnReview | |
| fixImmediately={ctx.input?.["fix-immediately"] ?? false} | |
| ctx={ctx} | |
| /> | |
| </Workflow> | |
| )); | |
| export default workflow; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment