backnotprop · backnotprop · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/apps/marketing/src/content/docs/commands/code-review.md b/apps/marketing/src/content/docs/commands/code-review.md
@@ -106,6 +106,10 @@ When multiple providers are available, set your default in **Settings → AI**.
 
 If only one provider is installed, it's used automatically with no configuration needed.
 
+## How review agents prompt the CLI
+
+The review agents (Claude, Codex, Code Tour) shell out to external CLIs. Plannotator controls the user message and output schema; the CLI's own harness owns the system prompt. See the [Prompts reference](/docs/reference/prompts/) for the full breakdown of what each provider sends, how the pieces join, and which knobs you can tune per job.
+
 ## Submitting feedback
 
 - **Send Feedback** formats your annotations and sends them to the agent

diff --git a/apps/marketing/src/content/docs/reference/prompts.md b/apps/marketing/src/content/docs/reference/prompts.md
@@ -0,0 +1,69 @@
+---
+title: "Prompts"
+description: "How Plannotator's review agents structure their prompts, what we control, what the CLI harness owns, and how the pieces fit together."
+sidebar:
+  order: 33
+section: "Reference"
+---
+
+Plannotator's review agents (Claude, Codex, and Code Tour) all shell out to an external CLI. This page maps what those CLIs receive on every invocation: which parts Plannotator controls, and which parts are owned by the CLI's own agent harness.
+
+Importantly, **we don't invent our own review prompts**. The Claude review prompt is derived from Claude Code's published open-source review prompt, and the Codex review prompt is copied verbatim from [`codex-rs/core/review_prompt.md`](https://github.com/openai/codex). You get the same review behavior those tools ship with. Code Tour is the one exception: it's a Plannotator-original workflow, so its prompt is ours.
+
+## The three layers
+
+Every review call is shaped by three layers:
+
+1. **System prompt.** Owned by the CLI (Claude Code or codex-rs). Plannotator never sets or touches this.
+2. **User message.** What Plannotator sends. Always a single concatenated string of two parts: a static **review prompt** plus a dynamic **user prompt**.
+3. **Output schema.** A JSON schema passed to the CLI as a flag, forcing the final assistant message to match a known shape.
+
+## What's in the user message
+
+The user message Plannotator sends is always:
+
+```
+<review prompt>
+
+---
+
+<user prompt>
+```
+
+**Review prompt** is a long, static review instruction that lives in the repo as a TypeScript constant. It's distinct per provider.
+
+**User prompt** is a short, dynamic line built per call from the diff type (`uncommitted`, `staged`, `last-commit`, `branch`, PR URL, and so on). The same builder is used for all providers.
+
+## Matrix
+
+| | Claude review | Codex review | Code Tour (Claude or Codex) |
+|---|---|---|---|
+| **System prompt** | Owned by `claude` CLI. We don't touch it. | Owned by `codex` CLI. We don't touch it. | Same as whichever engine runs. |
+| **Review prompt (static, ours)** | `CLAUDE_REVIEW_PROMPT` in `packages/server/claude-review.ts` | `CODEX_REVIEW_SYSTEM_PROMPT` in `packages/server/codex-review.ts` (misnamed; it's user content) | `TOUR_REVIEW_PROMPT` in `packages/server/tour-review.ts` |
+| **User prompt (dynamic, ours)** | `buildCodexReviewUserMessage(patch, diffType, …)` | same function | same function |
+| **Full user message** | `review prompt + "\n\n---\n\n" + user prompt` | same | same |
+| **Delivered via** | stdin | last positional argv | stdin (Claude engine) or positional argv (Codex engine) |
+| **Output schema flag** | `--json-schema <inline JSON>` | `--output-schema <file path>` | same as engine |
+| **Schema shape** | severity findings (`important`, `nit`, `pre_existing`) | priority findings (P0 through P3) | stops plus QA checklist |
+
+## Why the schema matters
+
+The schema flag is a terminal constraint, not a per-turn one. The agent reasons freely across N turns, reading files, grepping, running tests, and only the final assistant message is forced to deserialize against the schema. Everything upstream is unconstrained exploration.
+
+That's why this pattern works for review. You get agentic exploration (the whole point of using Claude Code or Codex over a raw LLM call), plus a machine-readable payload the UI can render without any scraping.
+
+## What you can tune per job
+
+From the **Agents** tab in the code-review UI, each provider exposes these settings:
+
+| Setting | Claude | Codex | Tour |
+|---|---|---|---|
+| Model | yes (`--model`) | yes (`-m`) | yes (per engine) |
+| Reasoning effort | yes (`--effort`) | yes (`-c model_reasoning_effort=…`) | yes (per engine) |
+| Fast mode | no | yes (`-c service_tier=fast`) | Codex engine only |
+
+None of these change the review prompt or user prompt. They only change how the underlying CLI executes the same user message.
+
+## Relationship to code review
+
+See [Code Review](/docs/commands/code-review/) for the end-to-end flow this feeds into.
diff --git a/apps/pi-extension/server/agent-jobs.ts b/apps/pi-extension/server/agent-jobs.ts
@@ -54,15 +54,25 @@ export interface AgentJobHandlerOptions {
 	mode: "plan" | "review" | "annotate";
 	getServerUrl: () => string;
 	getCwd: () => string;
-	/** Server-side command builder for known providers (codex, claude). */
-	buildCommand?: (provider: string) => Promise<{
+	/** Server-side command builder for known providers (codex, claude, tour). */
+	buildCommand?: (provider: string, config?: Record<string, unknown>) => Promise<{
 		command: string[];
 		outputPath?: string;
 		captureStdout?: boolean;
 		stdinPrompt?: string;
 		cwd?: string;
 		prompt?: string;
 		label?: string;
+		/** Underlying engine used (e.g., "claude" or "codex"). Stored on AgentJobInfo for UI display. */
+		engine?: string;
+		/** Model used (e.g., "sonnet", "opus"). Stored on AgentJobInfo for UI display. */
+		model?: string;
+		/** Claude --effort level. */
+		effort?: string;
+		/** Codex reasoning effort level. */
+		reasoningEffort?: string;
+		/** Whether Codex fast mode was enabled. */
+		fastMode?: boolean;
 	} | null>;
 	/** Called when a job completes successfully — parse results and push annotations. */
 	onJobComplete?: (job: AgentJobInfo, meta: { outputPath?: string; stdout?: string; cwd?: string }) => void | Promise<void>;
@@ -81,6 +91,7 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 	const capabilities: AgentCapability[] = [
 		{ id: "claude", name: "Claude Code", available: whichCmd("claude") },
 		{ id: "codex", name: "Codex CLI", available: whichCmd("codex") },
+		{ id: "tour", name: "Code Tour", available: whichCmd("claude") || whichCmd("codex") },
 	];
 	const capabilitiesResponse: AgentCapabilities = {
 		mode,
@@ -107,7 +118,7 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 		command: string[],
 		label: string,
 		outputPath?: string,
-		spawnOptions?: { captureStdout?: boolean; stdinPrompt?: string; cwd?: string; prompt?: string },
+		spawnOptions?: { captureStdout?: boolean; stdinPrompt?: string; cwd?: string; prompt?: string; engine?: string; model?: string; effort?: string; reasoningEffort?: string; fastMode?: boolean },
 	): AgentJobInfo {
 		const id = crypto.randomUUID();
 		const source = jobSource(id);
@@ -121,6 +132,11 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 			startedAt: Date.now(),
 			command,
 			cwd: getCwd(),
+			...(spawnOptions?.engine && { engine: spawnOptions.engine }),
+			...(spawnOptions?.model && { model: spawnOptions.model }),
+			...(spawnOptions?.effort && { effort: spawnOptions.effort }),
+			...(spawnOptions?.reasoningEffort && { reasoningEffort: spawnOptions.reasoningEffort }),
+			...(spawnOptions?.fastMode && { fastMode: spawnOptions.fastMode }),
 		};
 
 		let proc: ChildProcess | null = null;
@@ -169,7 +185,8 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 					const lines = text.split('\n');
 					for (const line of lines) {
 						if (!line.trim()) continue;
-						if (provider === "claude") {
+						// Tour jobs with the Claude engine also stream Claude JSONL.
+						if (provider === "claude" || spawnOptions?.engine === "claude") {
 							const formatted = formatClaudeLogEvent(line);
 							if (formatted !== null) {
 								broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
@@ -397,8 +414,20 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 					let stdinPrompt: string | undefined;
 					let spawnCwd: string | undefined;
 					let promptText: string | undefined;
+					let jobEngine: string | undefined;
+					let jobModel: string | undefined;
+					let jobEffort: string | undefined;
+					let jobReasoningEffort: string | undefined;
+					let jobFastMode: boolean | undefined;
 					if (options.buildCommand) {
-						const built = await options.buildCommand(provider);
+						// Thread config from POST body to buildCommand
+						const config: Record<string, unknown> = {};
+						if (typeof body.engine === "string") config.engine = body.engine;
+						if (typeof body.model === "string") config.model = body.model;
+						if (typeof body.reasoningEffort === "string") config.reasoningEffort = body.reasoningEffort;
+						if (typeof body.effort === "string") config.effort = body.effort;
+						if (body.fastMode === true) config.fastMode = true;
+						const built = await options.buildCommand(provider, Object.keys(config).length > 0 ? config : undefined);
 						if (built) {
 							command = built.command;
 							outputPath = built.outputPath;
@@ -407,6 +436,11 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 							spawnCwd = built.cwd;
 							promptText = built.prompt;
 							if (built.label) label = built.label;
+							jobEngine = built.engine;
+							jobModel = built.model;
+							jobEffort = built.effort;
+							jobReasoningEffort = built.reasoningEffort;
+							jobFastMode = built.fastMode;
 						}
 					}
 
@@ -420,6 +454,11 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 						stdinPrompt,
 						cwd: spawnCwd,
 						prompt: promptText,
+						engine: jobEngine,
+						model: jobModel,
+						effort: jobEffort,
+						reasoningEffort: jobReasoningEffort,
+						fastMode: jobFastMode,
 					});
 					json(res, { job }, 201);
 				} catch {

diff --git a/apps/pi-extension/server/serverReview.ts b/apps/pi-extension/server/serverReview.ts
@@ -71,6 +71,7 @@ import {
 	parseClaudeStreamOutput,
 	transformClaudeFindings,
 } from "../generated/claude-review.js";
+import { createTourSession } from "../generated/tour-review.js";
 
 /** Detect if running inside WSL (Windows Subsystem for Linux) */
 function detectWSL(): boolean {
@@ -202,12 +203,17 @@ export async function startReviewServer(options: {
 		}
 		return options.gitContext?.cwd ?? process.cwd();
 	}
+	// Tour session — shared factory encapsulates in-memory state, provider
+	// lifecycle, and route-handler helpers. See createTourSession in
+	// packages/server/tour-review.ts (vendored into generated/).
+	const tour = createTourSession();
+
 	const agentJobs = createAgentJobHandler({
 		mode: "review",
 		getServerUrl: () => serverUrl,
 		getCwd: resolveAgentCwd,
 
-		async buildCommand(provider) {
+		async buildCommand(provider, config) {
 			const cwd = resolveAgentCwd();
 			const hasAgentLocalAccess = !!options.agentCwd || !!options.gitContext;
 			const userMessage = buildCodexReviewUserMessage(
@@ -218,16 +224,25 @@ export async function startReviewServer(options: {
 			);
 
 			if (provider === "codex") {
+				const model = typeof config?.model === "string" && config.model ? config.model : undefined;
+				const reasoningEffort = typeof config?.reasoningEffort === "string" && config.reasoningEffort ? config.reasoningEffort : undefined;
+				const fastMode = config?.fastMode === true;
 				const outputPath = generateOutputPath();
 				const prompt = CODEX_REVIEW_SYSTEM_PROMPT + "\n\n---\n\n" + userMessage;
-				const command = await buildCodexCommand({ cwd, outputPath, prompt });
-				return { command, outputPath, prompt, label: "Codex Review" };
+				const command = await buildCodexCommand({ cwd, outputPath, prompt, model, reasoningEffort, fastMode });
+				return { command, outputPath, prompt, label: "Code Review", model, reasoningEffort, fastMode: fastMode || undefined };
 			}
 
 			if (provider === "claude") {
+				const model = typeof config?.model === "string" && config.model ? config.model : undefined;
+				const effort = typeof config?.effort === "string" && config.effort ? config.effort : undefined;
 				const prompt = CLAUDE_REVIEW_PROMPT + "\n\n---\n\n" + userMessage;
-				const { command, stdinPrompt } = buildClaudeCommand(prompt);
-				return { command, stdinPrompt, prompt, cwd, label: "Claude Code Review", captureStdout: true };
+				const { command, stdinPrompt } = buildClaudeCommand(prompt, model, effort);
+				return { command, stdinPrompt, prompt, cwd, label: "Code Review", captureStdout: true, model, effort };
+			}
+
+			if (provider === "tour") {
+				return tour.buildCommand({ cwd, userMessage, config });
 			}
 
 			return null;
@@ -242,7 +257,7 @@ export async function startReviewServer(options: {
 
 				// Override verdict if there are blocking findings (P0/P1) — Codex's
 				// freeform correctness string can say "mostly correct" with real bugs.
-				const hasBlockingFindings = output.findings.some((f: any) => f.priority !== null && f.priority <= 1);
+				const hasBlockingFindings = output.findings.some(f => f.priority !== null && f.priority <= 1);
 				job.summary = {
 					correctness: hasBlockingFindings ? "Issues Found" : output.overall_correctness,
 					explanation: output.overall_explanation,
@@ -259,7 +274,10 @@ export async function startReviewServer(options: {
 
 			if (job.provider === "claude" && meta.stdout) {
 				const output = parseClaudeStreamOutput(meta.stdout);
-				if (!output) return;
+				if (!output) {
+					console.error(`[claude-review] Failed to parse output (${meta.stdout.length} bytes, last 200: ${meta.stdout.slice(-200)})`);
+					return;
+				}
 
 				const total = output.summary.important + output.summary.nit + output.summary.pre_existing;
 				job.summary = {
@@ -275,6 +293,20 @@ export async function startReviewServer(options: {
 				}
 				return;
 			}
+
+			if (job.provider === "tour") {
+				const { summary } = await tour.onJobComplete({ job, meta });
+				if (summary) {
+					job.summary = summary;
+				} else {
+					// The process exited 0 but the model returned empty or malformed output
+					// and nothing was stored. Flip status so the client doesn't auto-open
+					// a successful-looking card that 404s on /api/tour/:id.
+					job.status = "failed";
+					job.error = "Tour generation returned empty or malformed output";
+				}
+				return;
+			}
 		},
 	});
 	const sharingEnabled =
@@ -412,6 +444,31 @@ export async function startReviewServer(options: {
 	const server = createServer(async (req, res) => {
 		const url = requestUrl(req);
 
+		// API: Get tour result
+		if (url.pathname.match(/^\/api\/tour\/[^/]+$/) && req.method === "GET") {
+			const jobId = url.pathname.slice("/api/tour/".length);
+			const result = tour.getTour(jobId);
+			if (!result) {
+				json(res, { error: "Tour not found" }, 404);
+				return;
+			}
+			json(res, result);
+			return;
+		}
+
+		// API: Save tour checklist state
+		if (url.pathname.match(/^\/api\/tour\/[^/]+\/checklist$/) && req.method === "PUT") {
+			const jobId = url.pathname.split("/")[3];
+			try {
+				const body = await parseBody(req) as { checked: boolean[] };
+				if (Array.isArray(body.checked)) tour.saveChecklist(jobId, body.checked);
+				json(res, { ok: true });
+			} catch {
+				json(res, { error: "Invalid JSON" }, 400);
+			}
+			return;
+		}
+
 		if (url.pathname === "/api/diff" && req.method === "GET") {
 			json(res, {
 				rawPatch: currentPatch,

diff --git a/apps/pi-extension/vendor.sh b/apps/pi-extension/vendor.sh
@@ -12,7 +12,7 @@ for f in feedback-templates review-core storage draft project pr-provider pr-git
 done
 
 # Vendor review agent modules from packages/server/ — rewrite imports for generated/ layout
-for f in codex-review claude-review path-utils; do
+for f in codex-review claude-review tour-review path-utils; do
   src="../../packages/server/$f.ts"
   printf '// @generated — DO NOT EDIT. Source: packages/server/%s.ts\n' "$f" | cat - "$src" \
     | sed 's|from "./vcs"|from "./review-core.js"|' \