Skip to content

Instantly share code, notes, and snippets.

@roninjin10
Created March 9, 2026 23:44
Show Gist options
  • Select an option

  • Save roninjin10/a8b6e66b314b7bd3056e941e848dbda3 to your computer and use it in GitHub Desktop.

Select an option

Save roninjin10/a8b6e66b314b7bd3056e941e848dbda3 to your computer and use it in GitHub Desktop.
Token burning review
// .jjhub/workflows/bug-review.tsx
//
// InfiniteTokenBurnReview — parallel bug-hunting agents that loop until
// two consecutive rounds find nothing. Optionally fixes bugs inline.
//
// Usage (CI):
// Triggered on landing request opened / ready-to-land
// Also available via manual dispatch with fix-immediately toggle
//
import { z } from "zod";
import {
createSmithers,
Sequence,
Parallel,
Ralph,
ClaudeCodeAgent,
CodexAgent,
GeminiAgent,
runWorkflow,
} from "smithers-orchestrator";
import { Workflow, Task, on } from "@jjhub/workflow";
// ---------------------------------------------------------------------------
// Schemas
// ---------------------------------------------------------------------------
const bugSchema = z.object({
file: z.string(),
line: z.number(),
severity: z.enum(["critical", "high", "medium", "low"]),
category: z.string(),
description: z.string(),
snippet: z.string(),
});
const { smithers, outputs } = createSmithers({
focuses: z.object({
assignments: z.array(
z.object({
agent: z.string(),
focus: z.string(),
searchPatterns: z.array(z.string()),
}),
),
}),
hunt: z.object({
agent: z.string(),
focus: z.string(),
bugs: z.array(bugSchema),
filesSearched: z.number(),
}),
verify: z.object({
confirmed: z.array(bugSchema),
falsePositives: z.array(z.object({ description: z.string(), reason: z.string() })),
}),
ranked: z.object({
bugs: z.array(bugSchema.extend({ rank: z.number() })),
summary: z.string(),
totalFound: z.number(),
totalConfirmed: z.number(),
}),
fix: z.object({
bugsFixed: z.array(z.object({ description: z.string(), file: z.string(), fix: z.string() })),
filesChanged: z.array(z.string()),
testsRun: z.boolean(),
}),
});
// ---------------------------------------------------------------------------
// Agents — each hunts with a different lens
// ---------------------------------------------------------------------------
const SYSTEM_BASE = `You are a senior security/reliability engineer for JJHub, a jj-native code hosting platform.
The codebase is Go (Chi router, sqlc, pgxpool) + Rust (clap, jj-lib).
Read docs/specs/engineering.md for architecture. Read actual source files before reporting bugs.
IMPORTANT: Only report REAL bugs you can prove with code evidence. No speculative issues.`;
const focusPlanner = new ClaudeCodeAgent({
id: "focus-planner",
model: "claude-opus-4-6",
systemPrompt: `${SYSTEM_BASE}
Your job is to plan the bug hunt. Analyze the codebase structure and assign each hunting agent
a distinct focus area so they don't duplicate work. Consider: security, logic errors, concurrency,
error handling, data validation, resource leaks, and edge cases.`,
});
const claudeHunter = new ClaudeCodeAgent({
id: "claude-hunter",
model: "claude-opus-4-6",
systemPrompt: `${SYSTEM_BASE}
Search methodically. Read files, grep for patterns, trace data flows.
For each bug: cite the exact file, line, and a code snippet proving the issue.
If you find nothing, return an empty bugs array — do NOT fabricate issues.`,
});
const codexHunter = new CodexAgent({
id: "codex-hunter",
model: "gpt-5.4",
systemPrompt: `${SYSTEM_BASE}
Search methodically. Read files, grep for patterns, trace data flows.
For each bug: cite the exact file, line, and a code snippet proving the issue.
If you find nothing, return an empty bugs array — do NOT fabricate issues.`,
});
const geminiHunter = new GeminiAgent({
id: "gemini-hunter",
model: "gemini-3.1-pro",
systemPrompt: `${SYSTEM_BASE}
Search methodically. Read files, grep for patterns, trace data flows.
For each bug: cite the exact file, line, and a code snippet proving the issue.
If you find nothing, return an empty bugs array — do NOT fabricate issues.`,
});
const verifier = new ClaudeCodeAgent({
id: "verifier",
model: "claude-opus-4-6",
systemPrompt: `${SYSTEM_BASE}
You are the verification gate. For every reported bug, read the actual source file and confirm
the bug is real. Reject false positives with a clear reason. Be strict — only confirm bugs
you can independently verify by reading the code.`,
});
const fixer = new ClaudeCodeAgent({
id: "fixer",
model: "claude-opus-4-6",
systemPrompt: `${SYSTEM_BASE}
Fix the confirmed bugs. Make minimal, surgical changes. Run tests after each fix.
Do not refactor surrounding code. Do not add comments. Just fix the bug.`,
});
// ---------------------------------------------------------------------------
// InfiniteTokenBurnReview component
// ---------------------------------------------------------------------------
type Ctx = Parameters<Parameters<typeof smithers>[0]>[0];
function InfiniteTokenBurnReview({
fixImmediately = false,
ctx,
}: {
fixImmediately?: boolean;
ctx: Ctx;
}) {
// Track consecutive empty rounds for the termination condition
const latestRanked = ctx.latest?.("ranked", "rank-bugs");
const prevRanked =
ctx.iterationCount?.("ranked", "rank-bugs") >= 2
? ctx.outputMaybe?.("ranked", {
nodeId: "rank-bugs",
iteration: (ctx.iteration ?? 1) - 2,
})
: undefined;
const currentEmpty = latestRanked?.totalConfirmed === 0;
const previousEmpty = prevRanked?.totalConfirmed === 0;
const twoConsecutiveEmpty = currentEmpty && previousEmpty;
// Get focus assignments for this round
const focuses = ctx.latest?.("focuses", "plan-focuses");
const assignments = focuses?.assignments ?? [];
const claudeFocus = assignments.find((a: any) => a.agent === "claude");
const codexFocus = assignments.find((a: any) => a.agent === "codex");
const geminiFocus = assignments.find((a: any) => a.agent === "gemini");
// Gather all bugs from this round's hunters
const claudeHunt = ctx.latest?.("hunt", "hunt-claude");
const codexHunt = ctx.latest?.("hunt", "hunt-codex");
const geminiHunt = ctx.latest?.("hunt", "hunt-gemini");
const allBugs = [
...(claudeHunt?.bugs ?? []),
...(codexHunt?.bugs ?? []),
...(geminiHunt?.bugs ?? []),
];
const verified = ctx.latest?.("verify", "verify-bugs");
const latestFix = ctx.latest?.("fix", "fix-bugs");
// Previous round context for agents to avoid re-reporting
const previousBugsSummary = latestRanked
? `\nPrevious round found ${latestRanked.totalConfirmed} confirmed bugs:\n${latestRanked.summary}\n\nDo NOT re-report these. Find NEW bugs only.`
: "";
return (
<Ralph
until={twoConsecutiveEmpty}
maxIterations={50}
onMaxReached="return-last"
>
<Sequence>
{/* Phase 1: Plan focused search areas for each agent */}
<Task id="plan-focuses" output={outputs.focuses} agent={focusPlanner}>
{`Plan the bug hunt for this round. Assign each of the 3 hunting agents (claude, codex, gemini)
a distinct focus area so they search different parts of the codebase.
Consider these dimensions:
- Security (injection, auth bypass, SSRF, path traversal)
- Logic errors (off-by-one, nil derefs, wrong comparisons)
- Concurrency (race conditions, deadlocks, missing locks)
- Error handling (swallowed errors, wrong status codes, missing rollbacks)
- Data validation (missing bounds checks, type confusion, overflow)
- Resource leaks (unclosed connections, goroutine leaks, file handles)
For each agent, provide:
- agent: "claude" | "codex" | "gemini"
- focus: a 1-sentence description of what to hunt for
- searchPatterns: grep patterns and file globs to start with
${previousBugsSummary}`}
</Task>
{/* Phase 2: Three agents hunt in parallel with assigned focuses */}
<Parallel>
<Task id="hunt-claude" output={outputs.hunt} agent={claudeHunter} retries={2}>
{`Hunt for bugs in the JJHub codebase.
YOUR ASSIGNED FOCUS: ${claudeFocus?.focus ?? "Security vulnerabilities and injection flaws"}
SUGGESTED SEARCH PATTERNS: ${JSON.stringify(claudeFocus?.searchPatterns ?? ["*.go", "*.rs"])}
Search the codebase thoroughly within your focus area. Read the actual source files.
For every bug you find, include the exact file path, line number, a severity rating,
a category, a description, and a verbatim code snippet showing the issue.
Return an empty bugs array if you find nothing — honesty is more valuable than volume.
${previousBugsSummary}`}
</Task>
<Task id="hunt-codex" output={outputs.hunt} agent={codexHunter} retries={2}>
{`Hunt for bugs in the JJHub codebase.
YOUR ASSIGNED FOCUS: ${codexFocus?.focus ?? "Logic errors and edge cases"}
SUGGESTED SEARCH PATTERNS: ${JSON.stringify(codexFocus?.searchPatterns ?? ["*.go", "*.rs"])}
Search the codebase thoroughly within your focus area. Read the actual source files.
For every bug you find, include the exact file path, line number, a severity rating,
a category, a description, and a verbatim code snippet showing the issue.
Return an empty bugs array if you find nothing — honesty is more valuable than volume.
${previousBugsSummary}`}
</Task>
<Task id="hunt-gemini" output={outputs.hunt} agent={geminiHunter} retries={2}>
{`Hunt for bugs in the JJHub codebase.
YOUR ASSIGNED FOCUS: ${geminiFocus?.focus ?? "Error handling and resource management"}
SUGGESTED SEARCH PATTERNS: ${JSON.stringify(geminiFocus?.searchPatterns ?? ["*.go", "*.rs"])}
Search the codebase thoroughly within your focus area. Read the actual source files.
For every bug you find, include the exact file path, line number, a severity rating,
a category, a description, and a verbatim code snippet showing the issue.
Return an empty bugs array if you find nothing — honesty is more valuable than volume.
${previousBugsSummary}`}
</Task>
</Parallel>
{/* Phase 3: Verify — independent agent reads source to confirm/reject */}
<Task
id="verify-bugs"
output={outputs.verify}
agent={verifier}
retries={2}
skipIf={allBugs.length === 0}
>
{`Verify the following ${allBugs.length} bug reports by reading the actual source code.
For each bug, open the cited file and line, read the surrounding context, and determine
if the bug is real or a false positive.
Bug reports to verify:
${JSON.stringify(allBugs, null, 2)}
Be strict. Only confirm bugs where you can independently see the problem in the code.
Reject anything speculative, already handled, or based on misreading the code.`}
</Task>
{/* Phase 4: Rank confirmed bugs by severity */}
<Task id="rank-bugs" output={outputs.ranked} skipIf={allBugs.length === 0}>
{() => {
const confirmed = verified?.confirmed ?? [];
const severityOrder = { critical: 0, high: 1, medium: 2, low: 3 };
const sorted = [...confirmed].sort(
(a, b) =>
(severityOrder[a.severity] ?? 4) - (severityOrder[b.severity] ?? 4),
);
const ranked = sorted.map((bug, i) => ({ ...bug, rank: i + 1 }));
const summary = ranked
.map(
(b) =>
`#${b.rank} [${b.severity}] ${b.file}:${b.line} — ${b.description}`,
)
.join("\n");
return {
bugs: ranked,
summary: summary || "No confirmed bugs this round.",
totalFound: allBugs.length,
totalConfirmed: confirmed.length,
};
}}
</Task>
{/* Phase 5 (optional): Fix confirmed bugs before next round */}
{fixImmediately ? (
<Task
id="fix-bugs"
output={outputs.fix}
agent={fixer}
retries={2}
skipIf={(verified?.confirmed?.length ?? 0) === 0}
>
{`Fix the following confirmed bugs. Make minimal, surgical changes only.
Bugs to fix (ranked by severity):
${JSON.stringify(latestRanked?.bugs ?? [], null, 2)}
Rules:
- Fix one bug at a time, verify each fix compiles
- Run \`go vet ./...\` and \`cargo check\` after changes
- Do NOT refactor surrounding code
- Do NOT add comments or documentation
- If a fix requires a test, add the minimal test case`}
</Task>
) : null}
</Sequence>
</Ralph>
);
}
// ---------------------------------------------------------------------------
// Workflow
// ---------------------------------------------------------------------------
const workflow = smithers((ctx) => (
<Workflow
name="Bug Review"
triggers={[
on.landingRequest.opened(),
on.landingRequest.readyToLand(),
on.manualDispatch({ "fix-immediately": false }),
]}
>
<InfiniteTokenBurnReview
fixImmediately={ctx.input?.["fix-immediately"] ?? false}
ctx={ctx}
/>
</Workflow>
));
export default workflow;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment