diff --git a/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/.openspec.yaml b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/.openspec.yaml new file mode 100644 index 0000000..81cd71f --- /dev/null +++ b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-11 diff --git a/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/proposal.md b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/proposal.md new file mode 100644 index 0000000..77e427d --- /dev/null +++ b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/proposal.md @@ -0,0 +1,15 @@ +## Why + +- GitGuardex needs a local PR review runner that can reuse already-authenticated Codex or Claude CLI sessions without requiring OpenAI or Anthropic API keys. +- GitHub posting should work with GitHub Actions `GITHUB_TOKEN` or local `gh` auth, with an artifact fallback when GitHub auth is unavailable. + +## What Changes + +- Add `gx pr-review --provider codex|claude --pr [--post]`. +- Fetch the PR diff through `gh pr diff `, prompt the selected local CLI for structured findings, and post one GitHub review with inline comments through `gh api`. +- Write a markdown review artifact instead of posting when `--post` is omitted or GitHub auth is unavailable. + +## Impact + +- Scope is CLI-only and self-hosted-runner friendly. It does not introduce model API dependencies or hosted bot state. +- Inline comment accuracy depends on provider output using changed-file paths and changed-line numbers. diff --git a/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/specs/codex-task/spec.md b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/specs/codex-task/spec.md new file mode 100644 index 0000000..dbf41f3 --- /dev/null +++ b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/specs/codex-task/spec.md @@ -0,0 +1,15 @@ +## ADDED Requirements + +### Requirement: Local PR Review Runner +The system SHALL provide a `gx pr-review` command that reviews a GitHub pull request using an authenticated local agent CLI without requiring OpenAI or Anthropic API tokens. + +#### Scenario: Review with GitHub posting +- **WHEN** `gx pr-review --provider codex --pr --post` runs in a repository with GitHub auth +- **THEN** the command reads the pull request diff through `gh pr diff ` +- **AND** sends a compact structured-review prompt to the selected local provider +- **AND** posts one GitHub review containing inline comments for returned findings. + +#### Scenario: Review without GitHub auth +- **WHEN** `gx pr-review --provider claude --pr --post` runs without `GITHUB_TOKEN`, `GH_TOKEN`, or usable `gh auth` +- **THEN** the command does not require model API credentials +- **AND** writes a markdown review artifact containing the structured findings instead of posting. diff --git a/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/tasks.md b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/tasks.md new file mode 100644 index 0000000..1200400 --- /dev/null +++ b/openspec/changes/agent-codex-codex-task-2026-05-11-15-20/tasks.md @@ -0,0 +1,34 @@ +## Definition of Done + +This change is complete only when **all** of the following are true: + +- Every checkbox below is checked. +- The agent branch reaches `MERGED` state on `origin` and the PR URL + state are recorded in the completion handoff. +- If any step blocks (test failure, conflict, ambiguous result), append a `BLOCKED:` line under section 4 explaining the blocker and **STOP**. Do not tick remaining cleanup boxes; do not silently skip the cleanup pipeline. + +## Handoff + +- Handoff: change=`agent-codex-codex-task-2026-05-11-15-20`; branch=`agent/codex/codex-task-2026-05-11-15-20`; scope=`gx pr-review local PR review runner`; action=`continue this sandbox or finish cleanup after a usage-limit/manual takeover`. +- Copy prompt: Continue `agent-codex-codex-task-2026-05-11-15-20` on branch `agent/codex/codex-task-2026-05-11-15-20`. Work inside the existing sandbox, review `openspec/changes/agent-codex-codex-task-2026-05-11-15-20/tasks.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent/codex/codex-task-2026-05-11-15-20 --base main --via-pr --wait-for-merge --cleanup`. + +## 1. Specification + +- [x] 1.1 Finalize proposal scope and acceptance criteria for `agent-codex-codex-task-2026-05-11-15-20`. +- [x] 1.2 Define normative requirements in `specs/codex-task/spec.md`. + +## 2. Implementation + +- [x] 2.1 Implement scoped behavior changes. +- [x] 2.2 Add/update focused regression coverage. + +## 3. Verification + +- [x] 3.1 Run targeted project verification commands. +- [x] 3.2 Run `openspec validate agent-codex-codex-task-2026-05-11-15-20 --type change --strict`. +- [x] 3.3 Run `openspec validate --specs`. + +## 4. Cleanup (mandatory; run before claiming completion) + +- [ ] 4.1 Run the cleanup pipeline: `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup`. This handles commit -> push -> PR create -> merge wait -> worktree prune in one invocation. +- [ ] 4.2 Record the PR URL and final merge state (`MERGED`) in the completion handoff. +- [ ] 4.3 Confirm the sandbox worktree is gone (`git worktree list` no longer shows the agent path; `git branch -a` shows no surviving local/remote refs for the branch). diff --git a/src/cli/args.js b/src/cli/args.js index 0a531b5..f55c0fb 100644 --- a/src/cli/args.js +++ b/src/cli/args.js @@ -261,6 +261,69 @@ function parseReviewArgs(rawArgs) { }; } +function parsePrReviewArgs(rawArgs) { + const parsed = parseTargetFlag(rawArgs, process.cwd()); + const options = { + target: parsed.target, + provider: 'codex', + pr: '', + post: false, + artifact: '', + timeoutMs: 10 * 60 * 1000, + }; + + for (let index = 0; index < parsed.args.length; index += 1) { + const arg = parsed.args[index]; + if (arg === '--provider') { + const next = requireValue(parsed.args, index, '--provider'); + if (!['codex', 'claude'].includes(next)) { + throw new Error(`Invalid --provider value: ${next} (expected codex|claude)`); + } + options.provider = next; + index += 1; + continue; + } + if (arg === '--pr') { + options.pr = requireValue(parsed.args, index, '--pr'); + index += 1; + continue; + } + if (arg === '--post') { + options.post = true; + continue; + } + if (arg === '--no-post') { + options.post = false; + continue; + } + if (arg === '--artifact' || arg === '--output') { + options.artifact = requireValue(parsed.args, index, arg); + index += 1; + continue; + } + if (arg === '--timeout-ms') { + const raw = requireValue(parsed.args, index, '--timeout-ms'); + const parsedTimeout = Number.parseInt(raw, 10); + if (!Number.isFinite(parsedTimeout) || parsedTimeout <= 0) { + throw new Error('--timeout-ms requires a positive integer'); + } + options.timeoutMs = parsedTimeout; + index += 1; + continue; + } + throw new Error(`Unknown option: ${arg}`); + } + + if (!options.pr) { + throw new Error('--pr requires a pull request number'); + } + if (!/^\d+$/.test(String(options.pr))) { + throw new Error(`--pr must be a pull request number (received: ${options.pr})`); + } + + return options; +} + function parseAgentsArgs(rawArgs) { const parsed = parseTargetFlag(rawArgs, process.cwd()); const [subcommandRaw = '', ...rest] = parsed.args; @@ -1138,6 +1201,7 @@ module.exports = { parseDoctorArgs, parseTargetFlag, parseReviewArgs, + parsePrReviewArgs, parseAgentsArgs, parseReportArgs, parseSyncArgs, diff --git a/src/cli/main.js b/src/cli/main.js index bf46c40..2abe06c 100755 --- a/src/cli/main.js +++ b/src/cli/main.js @@ -13,6 +13,7 @@ const { finishAgentSession } = require('../agents/finish'); const sessionSeverityReport = require('../report/session-severity'); const cockpitModule = require('../cockpit'); const agentsStart = require('../agents/start'); +const prReviewModule = require('../pr-review'); const { fs, path, @@ -135,6 +136,7 @@ const { parseDoctorArgs, parseTargetFlag, parseReviewArgs, + parsePrReviewArgs, parseAgentsArgs, parseReportArgs, parseSyncArgs, @@ -2541,6 +2543,13 @@ function review(rawArgs) { process.exitCode = typeof result.status === 'number' ? result.status : 1; } +function prReview(rawArgs) { + const options = parsePrReviewArgs(rawArgs); + const result = prReviewModule.runPrReview(options); + prReviewModule.printPrReviewResult(result); + process.exitCode = 0; +} + function agentsStatePathForRepo(repoRoot) { return path.join(repoRoot, AGENTS_BOTS_STATE_RELATIVE); } @@ -3938,6 +3947,7 @@ async function main() { } if (command === 'prompt') return prompt(rest); + if (command === 'pr-review') return prReview(rest); if (command === 'doctor') return doctor(rest); if (command === 'branch') return branch(rest); if (command === 'pivot') return pivot(rest); diff --git a/src/context.js b/src/context.js index dde5b07..b9d0d38 100644 --- a/src/context.js +++ b/src/context.js @@ -458,6 +458,7 @@ const CLI_COMMAND_GROUPS = [ description: 'Review / cleanup bots, AI setup prompts, and safety reports.', commands: [ ['agents', 'Start/stop repo-scoped review + cleanup bots'], + ['pr-review', 'Run local Codex/Claude PR review and post inline GitHub comments or write an artifact'], ['cockpit', 'Create or attach to a repo tmux cockpit session'], ['install-agent-skills', 'Install Guardex Codex/Claude skills into the user home'], ['prompt', 'Print AI setup checklist or named slices (--exec, --part, --list-parts, --snippet)'], diff --git a/src/pr-review.js b/src/pr-review.js new file mode 100644 index 0000000..ba21b28 --- /dev/null +++ b/src/pr-review.js @@ -0,0 +1,241 @@ +const { + fs, + path, + os, + GH_BIN, +} = require('./context'); +const { run } = require('./core/runtime'); + +const TOOL_PREFIX = '[gitguardex]'; +const VALID_SEVERITIES = new Set(['low', 'medium', 'high', 'critical']); + +function normalizeProvider(raw) { + const provider = String(raw || 'codex').trim().toLowerCase(); + if (!['codex', 'claude'].includes(provider)) { + throw new Error(`Invalid provider: ${raw}`); + } + return provider; +} + +function commandForProvider(provider, prompt) { + if (provider === 'claude') { + return { cmd: 'claude', args: ['-p', prompt] }; + } + return { cmd: 'codex', args: ['exec', prompt] }; +} + +function compactReviewPrompt(diff) { + return [ + 'You are gitguardex-code-assist, a PR review runner.', + 'Review this GitHub PR diff for correctness bugs, regressions, security issues, and missing tests.', + 'Return JSON only. Shape:', + '{"findings":[{"path":"file","line":123,"severity":"low|medium|high|critical","message":"concise finding","suggestion":"optional replacement or fix"}]}', + 'Rules: path and line must point to changed lines in the diff. Use an empty findings array when nothing is worth commenting.', + '', + 'PR diff:', + diff, + ].join('\n'); +} + +function extractJsonPayload(text) { + const raw = String(text || '').trim(); + if (!raw) return { findings: [] }; + const fenced = raw.match(/```(?:json)?\s*([\s\S]*?)```/i); + const candidate = fenced ? fenced[1].trim() : raw; + try { + return JSON.parse(candidate); + } catch (_error) { + const objectStart = candidate.indexOf('{'); + const objectEnd = candidate.lastIndexOf('}'); + if (objectStart >= 0 && objectEnd > objectStart) { + return JSON.parse(candidate.slice(objectStart, objectEnd + 1)); + } + const arrayStart = candidate.indexOf('['); + const arrayEnd = candidate.lastIndexOf(']'); + if (arrayStart >= 0 && arrayEnd > arrayStart) { + return { findings: JSON.parse(candidate.slice(arrayStart, arrayEnd + 1)) }; + } + throw new Error('Review provider did not return parseable JSON findings'); + } +} + +function normalizeFinding(rawFinding) { + const pathValue = String(rawFinding?.path || '').trim(); + const line = Number.parseInt(String(rawFinding?.line || ''), 10); + const severity = String(rawFinding?.severity || 'medium').trim().toLowerCase(); + const message = String(rawFinding?.message || '').trim(); + const suggestion = String(rawFinding?.suggestion || '').trim(); + if (!pathValue || !Number.isInteger(line) || line <= 0 || !message) { + return null; + } + return { + path: pathValue, + line, + severity: VALID_SEVERITIES.has(severity) ? severity : 'medium', + message, + suggestion, + }; +} + +function normalizeFindings(providerOutput) { + const payload = extractJsonPayload(providerOutput); + const rawFindings = Array.isArray(payload) ? payload : payload.findings; + if (!Array.isArray(rawFindings)) { + throw new Error('Review provider JSON must contain a findings array'); + } + return rawFindings.map(normalizeFinding).filter(Boolean); +} + +function findingBody(finding) { + const lines = [`**${finding.severity.toUpperCase()}** ${finding.message}`]; + if (finding.suggestion) { + lines.push('', 'Suggested fix:', '```suggestion', finding.suggestion, '```'); + } + return lines.join('\n'); +} + +function renderMarkdownReview({ pr, provider, findings }) { + const lines = [ + `# GitGuardex PR Review`, + '', + `- PR: #${pr}`, + `- Provider: ${provider}`, + `- Findings: ${findings.length}`, + '', + ]; + if (findings.length === 0) { + lines.push('No findings.'); + } else { + for (const finding of findings) { + lines.push(`## ${finding.severity.toUpperCase()} ${finding.path}:${finding.line}`); + lines.push(''); + lines.push(finding.message); + if (finding.suggestion) { + lines.push('', '```suggestion', finding.suggestion, '```'); + } + lines.push(''); + } + } + return `${lines.join('\n').replace(/\n{3,}/g, '\n\n')}\n`; +} + +function defaultArtifactPath(repoRoot, pr) { + return path.join(repoRoot, '.gitguardex', 'pr-reviews', `pr-${pr}.md`); +} + +function writeArtifact(repoRoot, artifactPath, payload) { + const outputPath = artifactPath + ? path.resolve(repoRoot, artifactPath) + : defaultArtifactPath(repoRoot, payload.pr); + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + fs.writeFileSync(outputPath, renderMarkdownReview(payload), 'utf8'); + return outputPath; +} + +function githubAuthAvailable(env = process.env, runner = run) { + if (env.GITHUB_TOKEN || env.GH_TOKEN) return true; + const result = runner(GH_BIN, ['auth', 'status'], { timeout: 15_000 }); + return result.status === 0; +} + +function fetchPrDiff(pr, repoRoot, runner = run) { + const result = runner(GH_BIN, ['pr', 'diff', String(pr)], { cwd: repoRoot, timeout: 120_000 }); + if (result.status !== 0) { + throw new Error(`gh pr diff ${pr} failed${result.stderr ? `\n${result.stderr.trim()}` : ''}`); + } + return result.stdout || ''; +} + +function runProviderReview(provider, diff, repoRoot, timeoutMs, runner = run) { + const prompt = compactReviewPrompt(diff); + const command = commandForProvider(provider, prompt); + const result = runner(command.cmd, command.args, { cwd: repoRoot, timeout: timeoutMs }); + if (result.status !== 0) { + throw new Error(`${provider} review failed${result.stderr ? `\n${result.stderr.trim()}` : ''}`); + } + return normalizeFindings(result.stdout || ''); +} + +function postGithubReview(pr, findings, repoRoot, runner = run) { + const comments = findings.map((finding) => ({ + path: finding.path, + line: finding.line, + side: 'RIGHT', + body: findingBody(finding), + })); + const body = findings.length > 0 + ? `GitGuardex code-assist found ${findings.length} issue(s).` + : 'GitGuardex code-assist found no issues worth inline comments.'; + const payload = { + event: 'COMMENT', + body, + comments, + }; + const inputPath = path.join(os.tmpdir(), `gitguardex-pr-review-${process.pid}-${Date.now()}.json`); + fs.writeFileSync(inputPath, JSON.stringify(payload), 'utf8'); + try { + const result = runner(GH_BIN, [ + 'api', + `repos/:owner/:repo/pulls/${pr}/reviews`, + '--method', + 'POST', + '--input', + inputPath, + ], { cwd: repoRoot, timeout: 120_000 }); + if (result.status !== 0) { + throw new Error(`gh api review post failed${result.stderr ? `\n${result.stderr.trim()}` : ''}`); + } + return result; + } finally { + try { + fs.unlinkSync(inputPath); + } catch (_error) { + // best effort cleanup for temp API payload + } + } +} + +function runPrReview(options, deps = {}) { + const runner = deps.run || run; + const repoRoot = path.resolve(options.target || process.cwd()); + const provider = normalizeProvider(options.provider); + const pr = String(options.pr); + const diff = fetchPrDiff(pr, repoRoot, runner); + const findings = runProviderReview(provider, diff, repoRoot, options.timeoutMs, runner); + const payload = { pr, provider, findings }; + + if (!options.post) { + const artifactPath = writeArtifact(repoRoot, options.artifact, payload); + return { posted: false, artifactPath, findings }; + } + + if (!githubAuthAvailable(process.env, runner)) { + const artifactPath = writeArtifact(repoRoot, options.artifact, payload); + return { posted: false, artifactPath, findings, reason: 'github-auth-unavailable' }; + } + + postGithubReview(pr, findings, repoRoot, runner); + return { posted: true, artifactPath: '', findings }; +} + +function printPrReviewResult(result) { + if (result.posted) { + console.log(`${TOOL_PREFIX} Posted PR review with ${result.findings.length} finding(s).`); + return; + } + if (result.reason === 'github-auth-unavailable') { + console.log(`${TOOL_PREFIX} GitHub auth unavailable; wrote PR review artifact: ${result.artifactPath}`); + return; + } + console.log(`${TOOL_PREFIX} Wrote PR review artifact: ${result.artifactPath}`); +} + +module.exports = { + compactReviewPrompt, + commandForProvider, + extractJsonPayload, + normalizeFindings, + renderMarkdownReview, + runPrReview, + printPrReviewResult, +}; diff --git a/test/pr-review.test.js b/test/pr-review.test.js new file mode 100644 index 0000000..d39d7eb --- /dev/null +++ b/test/pr-review.test.js @@ -0,0 +1,113 @@ +const { + test, + assert, + fs, + os, + path, + runNodeWithEnv, + createFakeBin, + initRepo, + seedCommit, + defineSpawnSuite, +} = require('./helpers/install-test-helpers'); +const prReview = require('../src/pr-review'); + +defineSpawnSuite('pr-review suite', () => { + +test('normalizeFindings parses fenced provider JSON and drops malformed findings', () => { + const findings = prReview.normalizeFindings('```json\n{"findings":[{"path":"src/a.js","line":7,"severity":"high","message":"bug"},{"path":"","line":0,"message":""}]}\n```'); + assert.deepEqual(findings, [ + { + path: 'src/a.js', + line: 7, + severity: 'high', + message: 'bug', + suggestion: '', + }, + ]); +}); + + +test('gx pr-review posts one GitHub review when auth is available', () => { + const repoDir = initRepo(); + seedCommit(repoDir); + const markerDir = fs.mkdtempSync(path.join(os.tmpdir(), 'guardex-pr-review-')); + const ghMarker = path.join(markerDir, 'gh-args.log'); + const apiPayload = path.join(markerDir, 'api-payload.json'); + const fakeGh = createFakeBin('gh', ` +if [[ "$1" == "pr" && "$2" == "diff" ]]; then + printf '%s\\n' 'diff --git a/src/a.js b/src/a.js' + printf '%s\\n' '@@ -1 +1 @@' + printf '%s\\n' '+const broken = true' + exit 0 +fi +if [[ "$1" == "auth" && "$2" == "status" ]]; then + exit 0 +fi +if [[ "$1" == "api" ]]; then + printf '%s\\n' "$*" > "${ghMarker}" + while [[ "$#" -gt 0 ]]; do + if [[ "$1" == "--input" ]]; then + cp "$2" "${apiPayload}" + exit 0 + fi + shift + done +fi +echo "unexpected gh args: $*" >&2 +exit 1 +`); + const fakeCodex = createFakeBin('codex', ` +printf '%s\\n' '{"findings":[{"path":"src/a.js","line":1,"severity":"medium","message":"Use a real assertion","suggestion":"const broken = false"}]}' +`); + + const result = runNodeWithEnv(['pr-review', '--provider', 'codex', '--pr', '12', '--post', '--target', repoDir], repoDir, { + PATH: `${fakeGh.fakeBin}:${fakeCodex.fakeBin}:${process.env.PATH}`, + }); + + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.match(result.stdout, /Posted PR review with 1 finding/); + assert.match(fs.readFileSync(ghMarker, 'utf8'), /repos\/:owner\/:repo\/pulls\/12\/reviews/); + const payload = JSON.parse(fs.readFileSync(apiPayload, 'utf8')); + assert.equal(payload.event, 'COMMENT'); + assert.equal(payload.comments.length, 1); + assert.equal(payload.comments[0].path, 'src/a.js'); + assert.equal(payload.comments[0].line, 1); + assert.match(payload.comments[0].body, /MEDIUM/); +}); + + +test('gx pr-review writes markdown artifact when GitHub auth is unavailable', () => { + const repoDir = initRepo(); + seedCommit(repoDir); + const fakeGh = createFakeBin('gh', ` +if [[ "$1" == "pr" && "$2" == "diff" ]]; then + printf '%s\\n' 'diff --git a/src/a.js b/src/a.js' + printf '%s\\n' '@@ -1 +1 @@' + printf '%s\\n' '+const broken = true' + exit 0 +fi +if [[ "$1" == "auth" && "$2" == "status" ]]; then + exit 1 +fi +echo "unexpected gh args: $*" >&2 +exit 1 +`); + const fakeClaude = createFakeBin('claude', ` +printf '%s\\n' '{"findings":[]}' +`); + + const result = runNodeWithEnv(['pr-review', '--provider', 'claude', '--pr', '13', '--post', '--target', repoDir], repoDir, { + PATH: `${fakeGh.fakeBin}:${fakeClaude.fakeBin}:${process.env.PATH}`, + GITHUB_TOKEN: '', + GH_TOKEN: '', + }); + + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.match(result.stdout, /GitHub auth unavailable; wrote PR review artifact:/); + const artifactPath = path.join(repoDir, '.gitguardex', 'pr-reviews', 'pr-13.md'); + assert.equal(fs.existsSync(artifactPath), true); + assert.match(fs.readFileSync(artifactPath, 'utf8'), /No findings/); +}); + +});