From c82c6e580342f9106b87cd0533231b9daad2583a Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Sun, 3 May 2026 12:05:25 -0700 Subject: [PATCH] Add adversarial review workflow --- .github/prompts/adversarial-review.md | 80 +++++++ .../build-baseline-binary.sh | 8 + .../checkout-workflow-dispatch-pr.sh | 7 + .../collect-review-context.sh | 26 +++ .../collect-test-inventory.sh | 36 +++ .../compose-retry-review-prompt.sh | 17 ++ .../compose-review-prompt.sh | 30 +++ .../enforce-recommendation.sh | 36 +++ .../install-os-dependencies.sh | 5 + .../persist-agent-response.sh | 26 +++ .../post-sticky-pr-comment.sh | 26 +++ .../render-review-summary.sh | 11 + .../adversarial-review/run-baseline-tests.sh | 8 + .../validate-initial-agent-response.sh | 26 +++ .../render-adversarial-review-summary.py | 198 +++++++++++++++++ .github/workflows/adversarial-review.yml | 209 ++++++++++++++++++ 16 files changed, 749 insertions(+) create mode 100644 .github/prompts/adversarial-review.md create mode 100755 .github/scripts/adversarial-review/build-baseline-binary.sh create mode 100755 .github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh create mode 100755 .github/scripts/adversarial-review/collect-review-context.sh create mode 100755 .github/scripts/adversarial-review/collect-test-inventory.sh create mode 100755 .github/scripts/adversarial-review/compose-retry-review-prompt.sh create mode 100755 .github/scripts/adversarial-review/compose-review-prompt.sh create mode 100755 .github/scripts/adversarial-review/enforce-recommendation.sh create mode 100755 .github/scripts/adversarial-review/install-os-dependencies.sh create mode 100755 .github/scripts/adversarial-review/persist-agent-response.sh create mode 100755 .github/scripts/adversarial-review/post-sticky-pr-comment.sh create mode 100755 .github/scripts/adversarial-review/render-review-summary.sh create mode 100755 .github/scripts/adversarial-review/run-baseline-tests.sh create mode 100755 .github/scripts/adversarial-review/validate-initial-agent-response.sh create mode 100644 .github/scripts/render-adversarial-review-summary.py create mode 100644 .github/workflows/adversarial-review.yml diff --git a/.github/prompts/adversarial-review.md b/.github/prompts/adversarial-review.md new file mode 100644 index 0000000..a4948b3 --- /dev/null +++ b/.github/prompts/adversarial-review.md @@ -0,0 +1,80 @@ +# Bash AST adversarial review + +You are an adversarial reviewer for `bash-ast`, a Rust CLI/library that uses GNU Bash's real parser through FFI to parse shell scripts into JSON AST and convert JSON AST back to bash. + +Your job is to add value beyond ordinary CI. Do not simply rerun the full test suite as your main contribution; the workflow has already captured baseline build/test logs for you. Instead, inspect the repository and the supplied context, identify parser behaviors worth challenging, and run a small number of targeted probes. + +## What to inspect first + +- `README.md`, `Cargo.toml`, `src/`, and relevant tests under `tests/`. +- `review-artifacts/pr-context.json` if present. +- `review-artifacts/base-diff.stat` and `review-artifacts/base-diff.patch` if present. +- `review-artifacts/build.log`, `review-artifacts/baseline-tests.log`, and status files if present. +- `review-artifacts/baseline-test-inventory.md` and `review-artifacts/baseline-test-list.txt` for the automated tests that were already enumerated after the baseline test run. + +Before planning probes, inspect the automated test inventory so you do not duplicate existing coverage or claim a gap that is already covered by a listed test. If this run is associated with a PR, extract 2-4 concrete, testable claims from the PR title/body/diff before running probes. If there is no PR context, pick high-risk parser/round-trip behaviors from the current checkout. + +## Probe guidance + +Prefer edge cases involving one or more of: + +- nested quotes and escaped newlines; +- command substitution and arithmetic expansion; +- heredocs and here-strings; +- process substitution; +- pipelines, negated pipelines, and lists; +- arrays and parameter expansion; +- case/select/for/while/function syntax; +- malformed syntax and graceful error handling; +- parse-to-JSON then `--to-bash` round trips. + +For each probe: + +1. Create temporary scripts/data only under `/tmp` or `review-artifacts/agent-probes/`. +2. Use the repository's actual binary/library/test harness whenever practical. The built CLI is usually `target/debug/bash-ast` after `cargo build`. +3. Capture concise evidence. If output is long, write full logs to `review-artifacts/agent-probes/` and summarize the relevant lines. +4. Decide whether the observed behavior supports or refutes the hypothesis. + +## Constraints + +- Do not modify repository source, tests, manifests, lockfiles, generated snapshots, or submodules. +- Do not install arbitrary dependencies. +- Do not run broad/unbounded commands that dump huge files or recursive listings. +- Do not use network access except GitHub context already provided by the workflow. +- Keep shell commands and outputs in the final response compact. +- In each `unitTestRecommendation`, distinguish between existing automated coverage you saw in the test inventory and any new coverage you believe should be added. +- If setup/build failures prevent runtime probes, perform source-level inspection and report `INVESTIGATE` with the best concrete blocker evidence. + +## Required final response format + +Return a concise human-readable review followed by a machine-readable JSON block between exact markers: + +`JSON_RESULT_START` + +```json +{ + "recommendation": "PASS|FAIL|INVESTIGATE", + "why": "One or two sentences explaining the recommendation and highest risk.", + "tests": [ + { + "title": "Short name", + "hypothesis": "What behavior was being tested", + "impact": "Why this matters if wrong", + "command": "Short command summary, not a giant script", + "output": "Concise observed output or pointer to artifact path", + "result": "PASS|FAIL", + "unitTestRecommendation": "What automated coverage should be added or why existing coverage is enough" + } + ], + "finalMessage": "Brief operator-facing summary" +} +``` + +`JSON_RESULT_END` + +Rules for the JSON block: + +- `recommendation` must be exactly `PASS`, `FAIL`, or `INVESTIGATE`. +- `tests` must contain at least one substantive probe or one clearly labeled blocker probe. +- Every test object must have non-empty string fields: `title`, `hypothesis`, `impact`, `command`, `output`, `result`, and `unitTestRecommendation`. +- Per-test `result` must be exactly `PASS` or `FAIL`. diff --git a/.github/scripts/adversarial-review/build-baseline-binary.sh b/.github/scripts/adversarial-review/build-baseline-binary.sh new file mode 100755 index 0000000..082369e --- /dev/null +++ b/.github/scripts/adversarial-review/build-baseline-binary.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set +e + +mkdir -p review-artifacts +cargo build --verbose 2>&1 | tee review-artifacts/build.log +status=${PIPESTATUS[0]} +echo "$status" > review-artifacts/build-status.txt +exit "$status" diff --git a/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh b/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh new file mode 100755 index 0000000..b276dba --- /dev/null +++ b/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euxo pipefail + +gh pr checkout "$REQUESTED_PR" +git submodule update --init --recursive +echo "HEAD_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV" +echo "BASE_REF=$(gh pr view "$REQUESTED_PR" --json baseRefName --jq .baseRefName)" >> "$GITHUB_ENV" diff --git a/.github/scripts/adversarial-review/collect-review-context.sh b/.github/scripts/adversarial-review/collect-review-context.sh new file mode 100755 index 0000000..fad3e3b --- /dev/null +++ b/.github/scripts/adversarial-review/collect-review-context.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euxo pipefail + +mkdir -p review-artifacts/agent-probes +git status --short > review-artifacts/git-status.txt +git log --oneline -n 20 > review-artifacts/recent-commits.txt +cargo metadata --no-deps --format-version 1 > review-artifacts/cargo-metadata.json || true + +git fetch origin "$BASE_REF" --depth=1 || true +if git rev-parse --verify "origin/$BASE_REF" >/dev/null 2>&1; then + git diff --stat "origin/$BASE_REF...HEAD" > review-artifacts/base-diff.stat || true + git diff --find-renames "origin/$BASE_REF...HEAD" > review-artifacts/base-diff.patch || true +else + : > review-artifacts/base-diff.stat + : > review-artifacts/base-diff.patch +fi + +if [ -n "$PR_NUMBER" ]; then + gh pr view "$PR_NUMBER" \ + --json number,title,author,body,baseRefName,headRefName,headRefOid,url,files,comments \ + > review-artifacts/pr-context.json || echo '{}' > review-artifacts/pr-context.json + gh pr diff "$PR_NUMBER" > review-artifacts/pr.diff || true +else + echo '{}' > review-artifacts/pr-context.json + : > review-artifacts/pr.diff +fi diff --git a/.github/scripts/adversarial-review/collect-test-inventory.sh b/.github/scripts/adversarial-review/collect-test-inventory.sh new file mode 100755 index 0000000..d6d99a4 --- /dev/null +++ b/.github/scripts/adversarial-review/collect-test-inventory.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set +e + +mkdir -p review-artifacts +cargo test -- --list 2>&1 | tee review-artifacts/baseline-test-list.txt +status=${PIPESTATUS[0]} +echo "$status" > review-artifacts/baseline-test-list-status.txt +python3 - <<'PY' +from pathlib import Path + +text = Path('review-artifacts/baseline-test-list.txt').read_text(encoding='utf-8', errors='replace') +tests = sorted({line.strip()[:-len(': test')] for line in text.splitlines() if line.strip().endswith(': test')}) +benches = sorted({line.strip()[:-len(': benchmark')] for line in text.splitlines() if line.strip().endswith(': benchmark')}) +status = Path('review-artifacts/baseline-test-list-status.txt').read_text(encoding='utf-8').strip() +lines = [ + '# Automated tests already enumerated', + '', + f'- Test inventory exit code: `{status}`', + f'- Enumerated test count: `{len(tests)}`', + f'- Enumerated benchmark count: `{len(benches)}`', + '', + 'The baseline workflow already ran `cargo test --verbose -- --test-threads=1` before this inventory was collected.', + 'Use this inventory to avoid duplicating existing automated coverage in adversarial probes and recommendations.', + '', + '## Test names', +] +max_names = 250 +lines.extend(f'- `{name}`' for name in tests[:max_names]) +if len(tests) > max_names: + lines.append(f'- ... truncated {len(tests) - max_names} additional tests; see `baseline-test-list.txt` for the full list.') +if benches: + lines.extend(['', '## Benchmark names']) + lines.extend(f'- `{name}`' for name in benches[:50]) +Path('review-artifacts/baseline-test-inventory.md').write_text('\n'.join(lines) + '\n', encoding='utf-8') +PY +exit 0 diff --git a/.github/scripts/adversarial-review/compose-retry-review-prompt.sh b/.github/scripts/adversarial-review/compose-retry-review-prompt.sh new file mode 100755 index 0000000..2c1bffb --- /dev/null +++ b/.github/scripts/adversarial-review/compose-retry-review-prompt.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -euo pipefail + +delimiter="RETRY_REVIEW_PROMPT_$(date +%s)_$$" +{ + cat review-artifacts/review-prompt.md + echo + echo "## Retry instruction" + echo + echo "The previous adversarial-review attempt did not produce a valid structured result. Reason: ${RETRY_REASON:-unknown}." + echo "Retry the review now. You must end with exact line markers JSON_RESULT_START and JSON_RESULT_END, with one valid JSON object between them and no nested marker text inside JSON strings. The JSON must satisfy the required schema, including a non-empty tests array." +} > review-artifacts/review-prompt-retry.md +{ + echo "prompt<<$delimiter" + cat review-artifacts/review-prompt-retry.md + echo "$delimiter" +} >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/adversarial-review/compose-review-prompt.sh b/.github/scripts/adversarial-review/compose-review-prompt.sh new file mode 100755 index 0000000..4902a57 --- /dev/null +++ b/.github/scripts/adversarial-review/compose-review-prompt.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +delimiter="REVIEW_PROMPT_$(date +%s)_$$" +{ + cat .github/prompts/adversarial-review.md + echo + echo "## Workflow-provided context" + echo + echo "- Repository: $GITHUB_REPOSITORY" + echo "- Event: $GITHUB_EVENT_NAME" + echo "- PR number: ${PR_NUMBER:-none}" + echo "- Base ref: ${BASE_REF:-unknown}" + echo "- Head SHA: ${HEAD_SHA:-unknown}" + echo "- Build exit code: $(cat review-artifacts/build-status.txt 2>/dev/null || echo unknown)" + echo "- Baseline test exit code: $(cat review-artifacts/baseline-test-status.txt 2>/dev/null || echo unknown)" + echo "- Baseline test inventory exit code: $(cat review-artifacts/baseline-test-list-status.txt 2>/dev/null || echo unknown)" + echo + echo "Artifacts are available under ./review-artifacts/. Keep any additional probe artifacts under ./review-artifacts/agent-probes/." + if [ -f review-artifacts/baseline-test-inventory.md ]; then + echo + echo "## Automated tests already run/enumerated" + cat review-artifacts/baseline-test-inventory.md + fi +} > review-artifacts/review-prompt.md +{ + echo "prompt<<$delimiter" + cat review-artifacts/review-prompt.md + echo "$delimiter" +} >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/adversarial-review/enforce-recommendation.sh b/.github/scripts/adversarial-review/enforce-recommendation.sh new file mode 100755 index 0000000..a4698d6 --- /dev/null +++ b/.github/scripts/adversarial-review/enforce-recommendation.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail + +python3 - <<'PY' +import importlib.util +import sys +from pathlib import Path + +module_path = Path('.github/scripts/render-adversarial-review-summary.py') +spec = importlib.util.spec_from_file_location('review_summary', module_path) +if spec is None or spec.loader is None: + print(f'::error::Could not load review summary parser from {module_path}') + sys.exit(1) + +review_summary = importlib.util.module_from_spec(spec) +spec.loader.exec_module(review_summary) + +response_path = Path('review-artifacts/agent-response.md') +response = response_path.read_text(encoding='utf-8', errors='replace') if response_path.exists() else '' +review, warning = review_summary.extract_json_blob(response) +if review is None: + print(f'::error::Adversarial review did not produce a valid structured recommendation: {warning}') + sys.exit(1) + +validation_errors = review_summary.validate_review(review) +if validation_errors: + for error in validation_errors: + print(f'::error::Invalid adversarial review result: {error}') + sys.exit(1) + +recommendation = str(review.get('recommendation', '')).strip() +print(f'Adversarial review recommendation: {recommendation}') +if recommendation != 'PASS': + print(f'::error::Adversarial review recommendation is {recommendation}; failing the check so the PR is not mergeable as-is.') + sys.exit(1) +PY diff --git a/.github/scripts/adversarial-review/install-os-dependencies.sh b/.github/scripts/adversarial-review/install-os-dependencies.sh new file mode 100755 index 0000000..deeaf9e --- /dev/null +++ b/.github/scripts/adversarial-review/install-os-dependencies.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euxo pipefail + +sudo apt-get update +sudo apt-get install -y libncurses-dev diff --git a/.github/scripts/adversarial-review/persist-agent-response.sh b/.github/scripts/adversarial-review/persist-agent-response.sh new file mode 100755 index 0000000..74dd1ba --- /dev/null +++ b/.github/scripts/adversarial-review/persist-agent-response.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +mkdir -p review-artifacts +python3 - <<'PY' +import json +import os +from pathlib import Path + +use_retry = os.environ.get('INITIAL_VALID') != 'true' +response = os.environ.get('RETRY_RESPONSE' if use_retry else 'INITIAL_RESPONSE', '') +success = os.environ.get('RETRY_SUCCESS' if use_retry else 'INITIAL_SUCCESS', '') +share_url = os.environ.get('RETRY_SHARE_URL' if use_retry else 'INITIAL_SHARE_URL', '') +Path('review-artifacts/agent-response.md').write_text(response, encoding='utf-8') +if use_retry: + Path('review-artifacts/agent-response-retry.md').write_text(os.environ.get('RETRY_RESPONSE', ''), encoding='utf-8') +Path('review-artifacts/agent-action-metadata.json').write_text( + json.dumps({ + 'selected_attempt': 'retry' if use_retry else 'initial', + 'initial_valid': os.environ.get('INITIAL_VALID', ''), + 'success': success, + 'share_url': share_url, + }, indent=2) + '\n', + encoding='utf-8', +) +PY diff --git a/.github/scripts/adversarial-review/post-sticky-pr-comment.sh b/.github/scripts/adversarial-review/post-sticky-pr-comment.sh new file mode 100755 index 0000000..536909b --- /dev/null +++ b/.github/scripts/adversarial-review/post-sticky-pr-comment.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +post_comment_input=$(jq -r '.inputs.post_comment // "false"' "$GITHUB_EVENT_PATH") +if [ "${ADVERSARIAL_REVIEW_POST_COMMENTS:-}" != "true" ] && [ "$post_comment_input" != "true" ]; then + echo "Sticky PR comment disabled; set ADVERSARIAL_REVIEW_POST_COMMENTS=true or workflow_dispatch post_comment=true to enable." + exit 0 +fi + +marker='' +body_file=$(mktemp) +{ + echo "$marker" + echo "" + echo + cat review-artifacts/adversarial-review-summary.md +} > "$body_file" + +comment_id=$(gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" --paginate \ + --jq ".[] | select(.body | contains(\"$marker\")) | .id" | tail -n 1) + +if [ -n "$comment_id" ]; then + gh api -X PATCH "repos/$GITHUB_REPOSITORY/issues/comments/$comment_id" -F "body=@$body_file" >/dev/null +else + gh pr comment "$PR_NUMBER" --body-file "$body_file" +fi diff --git a/.github/scripts/adversarial-review/render-review-summary.sh b/.github/scripts/adversarial-review/render-review-summary.sh new file mode 100755 index 0000000..4151a10 --- /dev/null +++ b/.github/scripts/adversarial-review/render-review-summary.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -euxo pipefail + +python3 .github/scripts/render-adversarial-review-summary.py \ + --response review-artifacts/agent-response.md \ + --build-log review-artifacts/build.log \ + --baseline-log review-artifacts/baseline-tests.log \ + --build-status review-artifacts/build-status.txt \ + --baseline-status review-artifacts/baseline-test-status.txt \ + --output review-artifacts/adversarial-review-summary.md +cat review-artifacts/adversarial-review-summary.md >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/scripts/adversarial-review/run-baseline-tests.sh b/.github/scripts/adversarial-review/run-baseline-tests.sh new file mode 100755 index 0000000..d5b6eef --- /dev/null +++ b/.github/scripts/adversarial-review/run-baseline-tests.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set +e + +mkdir -p review-artifacts +cargo test --verbose -- --test-threads=1 2>&1 | tee review-artifacts/baseline-tests.log +status=${PIPESTATUS[0]} +echo "$status" > review-artifacts/baseline-test-status.txt +exit "$status" diff --git a/.github/scripts/adversarial-review/validate-initial-agent-response.sh b/.github/scripts/adversarial-review/validate-initial-agent-response.sh new file mode 100755 index 0000000..d9561da --- /dev/null +++ b/.github/scripts/adversarial-review/validate-initial-agent-response.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +mkdir -p review-artifacts +python3 - <<'PY' >> "$GITHUB_OUTPUT" +import importlib.util +import os +from pathlib import Path + +Path('review-artifacts/agent-response-initial.md').write_text(os.environ.get('AGENT_RESPONSE', ''), encoding='utf-8') +spec = importlib.util.spec_from_file_location('review_summary', '.github/scripts/render-adversarial-review-summary.py') +review_summary = importlib.util.module_from_spec(spec) +spec.loader.exec_module(review_summary) +review, warning = review_summary.extract_json_blob(os.environ.get('AGENT_RESPONSE', '')) +errors = [] +if review is None: + errors.append(warning or 'missing structured review result') +else: + errors.extend(review_summary.validate_review(review)) +if errors: + print('valid=false') + print(f"reason={' ; '.join(errors)}") +else: + print('valid=true') + print('reason=valid structured review result') +PY diff --git a/.github/scripts/render-adversarial-review-summary.py b/.github/scripts/render-adversarial-review-summary.py new file mode 100644 index 0000000..2dfb254 --- /dev/null +++ b/.github/scripts/render-adversarial-review-summary.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +"""Render a GitHub Actions summary from agent adversarial review output.""" + +from __future__ import annotations + +import argparse +import html +import json +import re +from pathlib import Path +from typing import Any + + +def read_text(path: Path | None) -> str: + if path is None or not path.exists(): + return "" + return path.read_text(encoding="utf-8", errors="replace") + + +def extract_json_blob(response: str) -> tuple[dict[str, Any] | None, str | None]: + marker_match = re.search( + r"^\s*JSON_RESULT_START\s*$\s*(.*?)^\s*JSON_RESULT_END\s*$", + response, + flags=re.IGNORECASE | re.DOTALL | re.MULTILINE, + ) + if not marker_match: + return None, "Could not find JSON_RESULT_START / JSON_RESULT_END markers." + + candidate = marker_match.group(1).strip() + fence_match = re.fullmatch(r"```(?:json)?\s*(.*?)\s*```", candidate, flags=re.IGNORECASE | re.DOTALL) + if fence_match: + candidate = fence_match.group(1).strip() + + try: + parsed = json.loads(candidate) + except json.JSONDecodeError as error: + return None, f"Could not parse JSON review block between JSON_RESULT_START and JSON_RESULT_END: {error}" + + if not isinstance(parsed, dict): + return None, "Structured review JSON must be an object." + return parsed, None + + +def validate_review(review: dict[str, Any]) -> list[str]: + errors: list[str] = [] + recommendation = review.get("recommendation") + if recommendation not in {"PASS", "FAIL", "INVESTIGATE"}: + errors.append("recommendation must be PASS, FAIL, or INVESTIGATE") + + for field in ("why", "finalMessage"): + if not isinstance(review.get(field), str) or not review[field].strip(): + errors.append(f"{field} must be a non-empty string") + + tests = review.get("tests") + if not isinstance(tests, list) or not tests: + errors.append("tests must be a non-empty array") + return errors + + required_test_fields = ("title", "hypothesis", "impact", "command", "output", "result", "unitTestRecommendation") + for index, test in enumerate(tests, start=1): + if not isinstance(test, dict): + errors.append(f"tests[{index}] must be an object") + continue + for field in required_test_fields: + if not isinstance(test.get(field), str) or not test[field].strip(): + errors.append(f"tests[{index}].{field} must be a non-empty string") + if test.get("result") not in {"PASS", "FAIL"}: + errors.append(f"tests[{index}].result must be PASS or FAIL") + + return errors + + +def normalize_tests(value: Any) -> list[dict[str, Any]]: + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, dict)] + + +def markdown_text(value: Any) -> str: + return html.escape(str(value), quote=False) + + +def fenced_text(value: Any) -> str: + return html.escape(str(value), quote=False).replace("```", "`\\`\\`") + + +def append_log_tail(lines: list[str], title: str, text: str, max_lines: int = 60) -> None: + if not text.strip(): + return + tail = "\n".join(text.rstrip().splitlines()[-max_lines:]) + lines.extend([ + f"### {title}", + "", + "```text", + tail, + "```", + "", + ]) + + +def render_summary(args: argparse.Namespace) -> str: + response = read_text(args.response) + build_log = read_text(args.build_log) + baseline_log = read_text(args.baseline_log) + build_status = read_text(args.build_status).strip() if args.build_status and args.build_status.exists() else "unknown" + baseline_status = read_text(args.baseline_status).strip() if args.baseline_status and args.baseline_status.exists() else "unknown" + review, warning = extract_json_blob(response) + + lines: list[str] = [ + "## Adversarial review", + "", + f"- **Build exit code:** `{build_status or 'unknown'}`", + f"- **Baseline test exit code:** `{baseline_status or 'unknown'}`", + ] + + if review is None: + lines.extend([ + "- **Recommendation:** `UNKNOWN`", + "", + f"> ⚠️ {warning}", + "", + ]) + else: + recommendation = str(review.get("recommendation", "UNKNOWN")) + why = str(review.get("why", "No rationale supplied.")) + final_message = str(review.get("finalMessage", "")) + tests = normalize_tests(review.get("tests")) + validation_errors = validate_review(review) + + lines.extend([ + f"- **Recommendation:** `{markdown_text(recommendation)}`", + f"- **Why:** {markdown_text(why)}", + ]) + if final_message: + lines.append(f"- **Final message:** {markdown_text(final_message)}") + if validation_errors: + lines.extend(["", "### Structured review validation errors", ""]) + lines.extend(f"- {error}" for error in validation_errors) + lines.extend(["", "### Structured probes", ""]) + + if not tests: + lines.extend(["No structured probes were parsed from the agent response.", ""]) + else: + for index, test in enumerate(tests, start=1): + title = str(test.get("title", f"Probe {index}")) + result = str(test.get("result", "UNKNOWN")) + lines.extend([ + f"#### {index}. {markdown_text(title)} — `{markdown_text(result)}`", + "", + f"- **Hypothesis:** {markdown_text(test.get('hypothesis', ''))}", + f"- **Impact:** {markdown_text(test.get('impact', ''))}", + "- **Command:**", + "", + "```text", + fenced_text(test.get('command', '')), + "```", + "", + f"- **Output:** {markdown_text(test.get('output', ''))}", + f"- **Coverage recommendation:** {markdown_text(test.get('unitTestRecommendation', ''))}", + "", + ]) + + lines.extend([ + "### Full agent response", + "", + "
", + "Expand raw response", + "", + "````text", + fenced_text(response[-12000:]) if response else "(no agent response captured)", + "````", + "", + "
", + "", + ]) + + append_log_tail(lines, "Build log tail", build_log) + append_log_tail(lines, "Baseline test log tail", baseline_log) + + return "\n".join(lines) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--response", type=Path, required=True) + parser.add_argument("--build-log", type=Path) + parser.add_argument("--baseline-log", type=Path) + parser.add_argument("--build-status", type=Path) + parser.add_argument("--baseline-status", type=Path) + parser.add_argument("--output", type=Path, required=True) + args = parser.parse_args() + + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(render_summary(args), encoding="utf-8") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/adversarial-review.yml b/.github/workflows/adversarial-review.yml new file mode 100644 index 0000000..a4c647f --- /dev/null +++ b/.github/workflows/adversarial-review.yml @@ -0,0 +1,209 @@ +name: Adversarial review + +on: + workflow_dispatch: + inputs: + pr_number: + description: Optional pull request number to review + required: false + default: '' + ref: + description: Optional ref, branch, or SHA to review when pr_number is empty + required: false + default: '' + post_comment: + description: Post or update the sticky PR review comment + required: false + type: boolean + default: false + pull_request: + branches: [main] + types: [opened, synchronize, reopened, ready_for_review] + +permissions: + contents: read + pull-requests: read + issues: write + actions: read + +concurrency: + group: adversarial-review-${{ github.workflow }}-${{ github.event.pull_request.number || github.event.inputs.pr_number || github.event.inputs.ref || github.sha }} + cancel-in-progress: false + +env: + CARGO_TERM_COLOR: always + PR_NUMBER: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }} + BASE_REF: ${{ github.event.pull_request.base.ref || 'main' }} + HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }} + +jobs: + adversarial-review: + name: Adversarial review + # pull_request runs only for same-repository branches so repository/model secrets are not exposed to forked PR code. + if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }} + runs-on: ubuntu-latest + timeout-minutes: 45 + + steps: + - name: Checkout target + uses: actions/checkout@v6.0.2 + with: + submodules: recursive + fetch-depth: 0 + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.event.inputs.ref || github.sha }} + + - name: Checkout workflow_dispatch PR + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.pr_number != '' }} + env: + GH_TOKEN: ${{ github.token }} + REQUESTED_PR: ${{ github.event.inputs.pr_number }} + run: bash .github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh + + - name: Install OS dependencies + run: bash .github/scripts/adversarial-review/install-os-dependencies.sh + + - name: Install Rust + uses: dtolnay/rust-toolchain@v1 + with: + toolchain: stable + + - name: Setup Node.js for agent action + uses: actions/setup-node@v6 + with: + node-version: '25' + + - name: Build baseline binary + id: build + continue-on-error: true + run: bash .github/scripts/adversarial-review/build-baseline-binary.sh + + - name: Run baseline tests + id: baseline_tests + continue-on-error: true + run: bash .github/scripts/adversarial-review/run-baseline-tests.sh + + - name: Collect automated test inventory + if: always() + run: bash .github/scripts/adversarial-review/collect-test-inventory.sh + + - name: Collect review context + env: + GH_TOKEN: ${{ github.token }} + run: bash .github/scripts/adversarial-review/collect-review-context.sh + + - name: Compose review prompt + id: compose_prompt + run: bash .github/scripts/adversarial-review/compose-review-prompt.sh + + - name: Run adversarial review + id: review_agent + continue-on-error: true + uses: cv/pi-action@main + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + output_mode: output + allowed_associations: OWNER,MEMBER,COLLABORATOR + prompt: ${{ steps.compose_prompt.outputs.prompt }} + pr_number: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }} + timeout: '1800' + share_session: true + provider: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER }} + model: ${{ vars.ADVERSARIAL_REVIEW_MODEL }} + api_key: ${{ secrets.ADVERSARIAL_REVIEW_API_KEY }} + provider_base_url: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_BASE_URL }} + provider_api: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_API }} + model_name: ${{ vars.ADVERSARIAL_REVIEW_MODEL_NAME }} + model_reasoning: ${{ vars.ADVERSARIAL_REVIEW_MODEL_REASONING }} + model_input: ${{ vars.ADVERSARIAL_REVIEW_MODEL_INPUT }} + model_context_window: ${{ vars.ADVERSARIAL_REVIEW_MODEL_CONTEXT_WINDOW }} + model_max_tokens: ${{ vars.ADVERSARIAL_REVIEW_MODEL_MAX_TOKENS }} + env: + NPM_CONFIG_IGNORE_SCRIPTS: 'true' + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + + - name: Validate initial agent response + id: validate_initial_response + if: always() + env: + AGENT_RESPONSE: ${{ steps.review_agent.outputs.response }} + run: bash .github/scripts/adversarial-review/validate-initial-agent-response.sh + + - name: Compose retry review prompt + id: compose_retry_prompt + if: ${{ always() && steps.validate_initial_response.outputs.valid != 'true' }} + env: + RETRY_REASON: ${{ steps.validate_initial_response.outputs.reason }} + run: bash .github/scripts/adversarial-review/compose-retry-review-prompt.sh + + - name: Retry adversarial review if structured result is missing + id: review_agent_retry + if: ${{ always() && steps.validate_initial_response.outputs.valid != 'true' }} + continue-on-error: true + uses: cv/pi-action@main + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + output_mode: output + allowed_associations: OWNER,MEMBER,COLLABORATOR + prompt: ${{ steps.compose_retry_prompt.outputs.prompt }} + pr_number: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }} + timeout: '1800' + share_session: true + provider: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER }} + model: ${{ vars.ADVERSARIAL_REVIEW_MODEL }} + api_key: ${{ secrets.ADVERSARIAL_REVIEW_API_KEY }} + provider_base_url: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_BASE_URL }} + provider_api: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_API }} + model_name: ${{ vars.ADVERSARIAL_REVIEW_MODEL_NAME }} + model_reasoning: ${{ vars.ADVERSARIAL_REVIEW_MODEL_REASONING }} + model_input: ${{ vars.ADVERSARIAL_REVIEW_MODEL_INPUT }} + model_context_window: ${{ vars.ADVERSARIAL_REVIEW_MODEL_CONTEXT_WINDOW }} + model_max_tokens: ${{ vars.ADVERSARIAL_REVIEW_MODEL_MAX_TOKENS }} + env: + NPM_CONFIG_IGNORE_SCRIPTS: 'true' + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + + - name: Persist agent response + if: always() + env: + INITIAL_VALID: ${{ steps.validate_initial_response.outputs.valid }} + INITIAL_RESPONSE: ${{ steps.review_agent.outputs.response }} + INITIAL_SUCCESS: ${{ steps.review_agent.outputs.success }} + INITIAL_SHARE_URL: ${{ steps.review_agent.outputs.share_url }} + RETRY_RESPONSE: ${{ steps.review_agent_retry.outputs.response }} + RETRY_SUCCESS: ${{ steps.review_agent_retry.outputs.success }} + RETRY_SHARE_URL: ${{ steps.review_agent_retry.outputs.share_url }} + run: bash .github/scripts/adversarial-review/persist-agent-response.sh + + - name: Render review summary + if: always() + run: bash .github/scripts/adversarial-review/render-review-summary.sh + + - name: Upload review artifacts + if: always() + uses: actions/upload-artifact@v7 + with: + name: adversarial-review-${{ github.run_id }}-${{ github.run_attempt }} + path: review-artifacts/** + if-no-files-found: warn + retention-days: 14 + + - name: Post or update sticky PR comment + if: ${{ always() && env.PR_NUMBER != '' }} + env: + GH_TOKEN: ${{ github.token }} + ADVERSARIAL_REVIEW_POST_COMMENTS: ${{ vars.ADVERSARIAL_REVIEW_POST_COMMENTS }} + run: bash .github/scripts/adversarial-review/post-sticky-pr-comment.sh + + - name: Enforce adversarial review recommendation + if: always() + run: bash .github/scripts/adversarial-review/enforce-recommendation.sh