From c82c6e580342f9106b87cd0533231b9daad2583a Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sun, 3 May 2026 12:05:25 -0700
Subject: [PATCH] Add adversarial review workflow

---
 .github/prompts/adversarial-review.md         |  80 +++++++
 .../build-baseline-binary.sh                  |   8 +
 .../checkout-workflow-dispatch-pr.sh          |   7 +
 .../collect-review-context.sh                 |  26 +++
 .../collect-test-inventory.sh                 |  36 +++
 .../compose-retry-review-prompt.sh            |  17 ++
 .../compose-review-prompt.sh                  |  30 +++
 .../enforce-recommendation.sh                 |  36 +++
 .../install-os-dependencies.sh                |   5 +
 .../persist-agent-response.sh                 |  26 +++
 .../post-sticky-pr-comment.sh                 |  26 +++
 .../render-review-summary.sh                  |  11 +
 .../adversarial-review/run-baseline-tests.sh  |   8 +
 .../validate-initial-agent-response.sh        |  26 +++
 .../render-adversarial-review-summary.py      | 198 +++++++++++++++++
 .github/workflows/adversarial-review.yml      | 209 ++++++++++++++++++
 16 files changed, 749 insertions(+)
 create mode 100644 .github/prompts/adversarial-review.md
 create mode 100755 .github/scripts/adversarial-review/build-baseline-binary.sh
 create mode 100755 .github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh
 create mode 100755 .github/scripts/adversarial-review/collect-review-context.sh
 create mode 100755 .github/scripts/adversarial-review/collect-test-inventory.sh
 create mode 100755 .github/scripts/adversarial-review/compose-retry-review-prompt.sh
 create mode 100755 .github/scripts/adversarial-review/compose-review-prompt.sh
 create mode 100755 .github/scripts/adversarial-review/enforce-recommendation.sh
 create mode 100755 .github/scripts/adversarial-review/install-os-dependencies.sh
 create mode 100755 .github/scripts/adversarial-review/persist-agent-response.sh
 create mode 100755 .github/scripts/adversarial-review/post-sticky-pr-comment.sh
 create mode 100755 .github/scripts/adversarial-review/render-review-summary.sh
 create mode 100755 .github/scripts/adversarial-review/run-baseline-tests.sh
 create mode 100755 .github/scripts/adversarial-review/validate-initial-agent-response.sh
 create mode 100644 .github/scripts/render-adversarial-review-summary.py
 create mode 100644 .github/workflows/adversarial-review.yml

diff --git a/.github/prompts/adversarial-review.md b/.github/prompts/adversarial-review.md
new file mode 100644
index 0000000..a4948b3
--- /dev/null
+++ b/.github/prompts/adversarial-review.md
@@ -0,0 +1,80 @@
+# Bash AST adversarial review
+
+You are an adversarial reviewer for `bash-ast`, a Rust CLI/library that uses GNU Bash's real parser through FFI to parse shell scripts into JSON AST and convert JSON AST back to bash.
+
+Your job is to add value beyond ordinary CI. Do not simply rerun the full test suite as your main contribution; the workflow has already captured baseline build/test logs for you. Instead, inspect the repository and the supplied context, identify parser behaviors worth challenging, and run a small number of targeted probes.
+
+## What to inspect first
+
+- `README.md`, `Cargo.toml`, `src/`, and relevant tests under `tests/`.
+- `review-artifacts/pr-context.json` if present.
+- `review-artifacts/base-diff.stat` and `review-artifacts/base-diff.patch` if present.
+- `review-artifacts/build.log`, `review-artifacts/baseline-tests.log`, and status files if present.
+- `review-artifacts/baseline-test-inventory.md` and `review-artifacts/baseline-test-list.txt` for the automated tests that were already enumerated after the baseline test run.
+
+Before planning probes, inspect the automated test inventory so you do not duplicate existing coverage or claim a gap that is already covered by a listed test. If this run is associated with a PR, extract 2-4 concrete, testable claims from the PR title/body/diff before running probes. If there is no PR context, pick high-risk parser/round-trip behaviors from the current checkout.
+
+## Probe guidance
+
+Prefer edge cases involving one or more of:
+
+- nested quotes and escaped newlines;
+- command substitution and arithmetic expansion;
+- heredocs and here-strings;
+- process substitution;
+- pipelines, negated pipelines, and lists;
+- arrays and parameter expansion;
+- case/select/for/while/function syntax;
+- malformed syntax and graceful error handling;
+- parse-to-JSON then `--to-bash` round trips.
+
+For each probe:
+
+1. Create temporary scripts/data only under `/tmp` or `review-artifacts/agent-probes/`.
+2. Use the repository's actual binary/library/test harness whenever practical. The built CLI is usually `target/debug/bash-ast` after `cargo build`.
+3. Capture concise evidence. If output is long, write full logs to `review-artifacts/agent-probes/` and summarize the relevant lines.
+4. Decide whether the observed behavior supports or refutes the hypothesis.
+
+## Constraints
+
+- Do not modify repository source, tests, manifests, lockfiles, generated snapshots, or submodules.
+- Do not install arbitrary dependencies.
+- Do not run broad/unbounded commands that dump huge files or recursive listings.
+- Do not use network access except GitHub context already provided by the workflow.
+- Keep shell commands and outputs in the final response compact.
+- In each `unitTestRecommendation`, distinguish between existing automated coverage you saw in the test inventory and any new coverage you believe should be added.
+- If setup/build failures prevent runtime probes, perform source-level inspection and report `INVESTIGATE` with the best concrete blocker evidence.
+
+## Required final response format
+
+Return a concise human-readable review followed by a machine-readable JSON block between exact markers:
+
+`JSON_RESULT_START`
+
+```json
+{
+  "recommendation": "PASS|FAIL|INVESTIGATE",
+  "why": "One or two sentences explaining the recommendation and highest risk.",
+  "tests": [
+    {
+      "title": "Short name",
+      "hypothesis": "What behavior was being tested",
+      "impact": "Why this matters if wrong",
+      "command": "Short command summary, not a giant script",
+      "output": "Concise observed output or pointer to artifact path",
+      "result": "PASS|FAIL",
+      "unitTestRecommendation": "What automated coverage should be added or why existing coverage is enough"
+    }
+  ],
+  "finalMessage": "Brief operator-facing summary"
+}
+```
+
+`JSON_RESULT_END`
+
+Rules for the JSON block:
+
+- `recommendation` must be exactly `PASS`, `FAIL`, or `INVESTIGATE`.
+- `tests` must contain at least one substantive probe or one clearly labeled blocker probe.
+- Every test object must have non-empty string fields: `title`, `hypothesis`, `impact`, `command`, `output`, `result`, and `unitTestRecommendation`.
+- Per-test `result` must be exactly `PASS` or `FAIL`.
diff --git a/.github/scripts/adversarial-review/build-baseline-binary.sh b/.github/scripts/adversarial-review/build-baseline-binary.sh
new file mode 100755
index 0000000..082369e
--- /dev/null
+++ b/.github/scripts/adversarial-review/build-baseline-binary.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set +e
+
+mkdir -p review-artifacts
+cargo build --verbose 2>&1 | tee review-artifacts/build.log
+status=${PIPESTATUS[0]}
+echo "$status" > review-artifacts/build-status.txt
+exit "$status"
diff --git a/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh b/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh
new file mode 100755
index 0000000..b276dba
--- /dev/null
+++ b/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+gh pr checkout "$REQUESTED_PR"
+git submodule update --init --recursive
+echo "HEAD_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV"
+echo "BASE_REF=$(gh pr view "$REQUESTED_PR" --json baseRefName --jq .baseRefName)" >> "$GITHUB_ENV"
diff --git a/.github/scripts/adversarial-review/collect-review-context.sh b/.github/scripts/adversarial-review/collect-review-context.sh
new file mode 100755
index 0000000..fad3e3b
--- /dev/null
+++ b/.github/scripts/adversarial-review/collect-review-context.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+mkdir -p review-artifacts/agent-probes
+git status --short > review-artifacts/git-status.txt
+git log --oneline -n 20 > review-artifacts/recent-commits.txt
+cargo metadata --no-deps --format-version 1 > review-artifacts/cargo-metadata.json || true
+
+git fetch origin "$BASE_REF" --depth=1 || true
+if git rev-parse --verify "origin/$BASE_REF" >/dev/null 2>&1; then
+  git diff --stat "origin/$BASE_REF...HEAD" > review-artifacts/base-diff.stat || true
+  git diff --find-renames "origin/$BASE_REF...HEAD" > review-artifacts/base-diff.patch || true
+else
+  : > review-artifacts/base-diff.stat
+  : > review-artifacts/base-diff.patch
+fi
+
+if [ -n "$PR_NUMBER" ]; then
+  gh pr view "$PR_NUMBER" \
+    --json number,title,author,body,baseRefName,headRefName,headRefOid,url,files,comments \
+    > review-artifacts/pr-context.json || echo '{}' > review-artifacts/pr-context.json
+  gh pr diff "$PR_NUMBER" > review-artifacts/pr.diff || true
+else
+  echo '{}' > review-artifacts/pr-context.json
+  : > review-artifacts/pr.diff
+fi
diff --git a/.github/scripts/adversarial-review/collect-test-inventory.sh b/.github/scripts/adversarial-review/collect-test-inventory.sh
new file mode 100755
index 0000000..d6d99a4
--- /dev/null
+++ b/.github/scripts/adversarial-review/collect-test-inventory.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+set +e
+
+mkdir -p review-artifacts
+cargo test -- --list 2>&1 | tee review-artifacts/baseline-test-list.txt
+status=${PIPESTATUS[0]}
+echo "$status" > review-artifacts/baseline-test-list-status.txt
+python3 - <<'PY'
+from pathlib import Path
+
+text = Path('review-artifacts/baseline-test-list.txt').read_text(encoding='utf-8', errors='replace')
+tests = sorted({line.strip()[:-len(': test')] for line in text.splitlines() if line.strip().endswith(': test')})
+benches = sorted({line.strip()[:-len(': benchmark')] for line in text.splitlines() if line.strip().endswith(': benchmark')})
+status = Path('review-artifacts/baseline-test-list-status.txt').read_text(encoding='utf-8').strip()
+lines = [
+    '# Automated tests already enumerated',
+    '',
+    f'- Test inventory exit code: `{status}`',
+    f'- Enumerated test count: `{len(tests)}`',
+    f'- Enumerated benchmark count: `{len(benches)}`',
+    '',
+    'The baseline workflow already ran `cargo test --verbose -- --test-threads=1` before this inventory was collected.',
+    'Use this inventory to avoid duplicating existing automated coverage in adversarial probes and recommendations.',
+    '',
+    '## Test names',
+]
+max_names = 250
+lines.extend(f'- `{name}`' for name in tests[:max_names])
+if len(tests) > max_names:
+    lines.append(f'- ... truncated {len(tests) - max_names} additional tests; see `baseline-test-list.txt` for the full list.')
+if benches:
+    lines.extend(['', '## Benchmark names'])
+    lines.extend(f'- `{name}`' for name in benches[:50])
+Path('review-artifacts/baseline-test-inventory.md').write_text('\n'.join(lines) + '\n', encoding='utf-8')
+PY
+exit 0
diff --git a/.github/scripts/adversarial-review/compose-retry-review-prompt.sh b/.github/scripts/adversarial-review/compose-retry-review-prompt.sh
new file mode 100755
index 0000000..2c1bffb
--- /dev/null
+++ b/.github/scripts/adversarial-review/compose-retry-review-prompt.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+delimiter="RETRY_REVIEW_PROMPT_$(date +%s)_$$"
+{
+  cat review-artifacts/review-prompt.md
+  echo
+  echo "## Retry instruction"
+  echo
+  echo "The previous adversarial-review attempt did not produce a valid structured result. Reason: ${RETRY_REASON:-unknown}."
+  echo "Retry the review now. You must end with exact line markers JSON_RESULT_START and JSON_RESULT_END, with one valid JSON object between them and no nested marker text inside JSON strings. The JSON must satisfy the required schema, including a non-empty tests array."
+} > review-artifacts/review-prompt-retry.md
+{
+  echo "prompt<<$delimiter"
+  cat review-artifacts/review-prompt-retry.md
+  echo "$delimiter"
+} >> "$GITHUB_OUTPUT"
diff --git a/.github/scripts/adversarial-review/compose-review-prompt.sh b/.github/scripts/adversarial-review/compose-review-prompt.sh
new file mode 100755
index 0000000..4902a57
--- /dev/null
+++ b/.github/scripts/adversarial-review/compose-review-prompt.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+delimiter="REVIEW_PROMPT_$(date +%s)_$$"
+{
+  cat .github/prompts/adversarial-review.md
+  echo
+  echo "## Workflow-provided context"
+  echo
+  echo "- Repository: $GITHUB_REPOSITORY"
+  echo "- Event: $GITHUB_EVENT_NAME"
+  echo "- PR number: ${PR_NUMBER:-none}"
+  echo "- Base ref: ${BASE_REF:-unknown}"
+  echo "- Head SHA: ${HEAD_SHA:-unknown}"
+  echo "- Build exit code: $(cat review-artifacts/build-status.txt 2>/dev/null || echo unknown)"
+  echo "- Baseline test exit code: $(cat review-artifacts/baseline-test-status.txt 2>/dev/null || echo unknown)"
+  echo "- Baseline test inventory exit code: $(cat review-artifacts/baseline-test-list-status.txt 2>/dev/null || echo unknown)"
+  echo
+  echo "Artifacts are available under ./review-artifacts/. Keep any additional probe artifacts under ./review-artifacts/agent-probes/."
+  if [ -f review-artifacts/baseline-test-inventory.md ]; then
+    echo
+    echo "## Automated tests already run/enumerated"
+    cat review-artifacts/baseline-test-inventory.md
+  fi
+} > review-artifacts/review-prompt.md
+{
+  echo "prompt<<$delimiter"
+  cat review-artifacts/review-prompt.md
+  echo "$delimiter"
+} >> "$GITHUB_OUTPUT"
diff --git a/.github/scripts/adversarial-review/enforce-recommendation.sh b/.github/scripts/adversarial-review/enforce-recommendation.sh
new file mode 100755
index 0000000..a4698d6
--- /dev/null
+++ b/.github/scripts/adversarial-review/enforce-recommendation.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+python3 - <<'PY'
+import importlib.util
+import sys
+from pathlib import Path
+
+module_path = Path('.github/scripts/render-adversarial-review-summary.py')
+spec = importlib.util.spec_from_file_location('review_summary', module_path)
+if spec is None or spec.loader is None:
+    print(f'::error::Could not load review summary parser from {module_path}')
+    sys.exit(1)
+
+review_summary = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(review_summary)
+
+response_path = Path('review-artifacts/agent-response.md')
+response = response_path.read_text(encoding='utf-8', errors='replace') if response_path.exists() else ''
+review, warning = review_summary.extract_json_blob(response)
+if review is None:
+    print(f'::error::Adversarial review did not produce a valid structured recommendation: {warning}')
+    sys.exit(1)
+
+validation_errors = review_summary.validate_review(review)
+if validation_errors:
+    for error in validation_errors:
+        print(f'::error::Invalid adversarial review result: {error}')
+    sys.exit(1)
+
+recommendation = str(review.get('recommendation', '')).strip()
+print(f'Adversarial review recommendation: {recommendation}')
+if recommendation != 'PASS':
+    print(f'::error::Adversarial review recommendation is {recommendation}; failing the check so the PR is not mergeable as-is.')
+    sys.exit(1)
+PY
diff --git a/.github/scripts/adversarial-review/install-os-dependencies.sh b/.github/scripts/adversarial-review/install-os-dependencies.sh
new file mode 100755
index 0000000..deeaf9e
--- /dev/null
+++ b/.github/scripts/adversarial-review/install-os-dependencies.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+sudo apt-get update
+sudo apt-get install -y libncurses-dev
diff --git a/.github/scripts/adversarial-review/persist-agent-response.sh b/.github/scripts/adversarial-review/persist-agent-response.sh
new file mode 100755
index 0000000..74dd1ba
--- /dev/null
+++ b/.github/scripts/adversarial-review/persist-agent-response.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+mkdir -p review-artifacts
+python3 - <<'PY'
+import json
+import os
+from pathlib import Path
+
+use_retry = os.environ.get('INITIAL_VALID') != 'true'
+response = os.environ.get('RETRY_RESPONSE' if use_retry else 'INITIAL_RESPONSE', '')
+success = os.environ.get('RETRY_SUCCESS' if use_retry else 'INITIAL_SUCCESS', '')
+share_url = os.environ.get('RETRY_SHARE_URL' if use_retry else 'INITIAL_SHARE_URL', '')
+Path('review-artifacts/agent-response.md').write_text(response, encoding='utf-8')
+if use_retry:
+    Path('review-artifacts/agent-response-retry.md').write_text(os.environ.get('RETRY_RESPONSE', ''), encoding='utf-8')
+Path('review-artifacts/agent-action-metadata.json').write_text(
+    json.dumps({
+        'selected_attempt': 'retry' if use_retry else 'initial',
+        'initial_valid': os.environ.get('INITIAL_VALID', ''),
+        'success': success,
+        'share_url': share_url,
+    }, indent=2) + '\n',
+    encoding='utf-8',
+)
+PY
diff --git a/.github/scripts/adversarial-review/post-sticky-pr-comment.sh b/.github/scripts/adversarial-review/post-sticky-pr-comment.sh
new file mode 100755
index 0000000..536909b
--- /dev/null
+++ b/.github/scripts/adversarial-review/post-sticky-pr-comment.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+post_comment_input=$(jq -r '.inputs.post_comment // "false"' "$GITHUB_EVENT_PATH")
+if [ "${ADVERSARIAL_REVIEW_POST_COMMENTS:-}" != "true" ] && [ "$post_comment_input" != "true" ]; then
+  echo "Sticky PR comment disabled; set ADVERSARIAL_REVIEW_POST_COMMENTS=true or workflow_dispatch post_comment=true to enable."
+  exit 0
+fi
+
+marker='<!-- adversarial-review:bash-ast -->'
+body_file=$(mktemp)
+{
+  echo "$marker"
+  echo "<!-- head_sha: ${HEAD_SHA:-unknown}; run_id: $GITHUB_RUN_ID; run_attempt: $GITHUB_RUN_ATTEMPT -->"
+  echo
+  cat review-artifacts/adversarial-review-summary.md
+} > "$body_file"
+
+comment_id=$(gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" --paginate \
+  --jq ".[] | select(.body | contains(\"$marker\")) | .id" | tail -n 1)
+
+if [ -n "$comment_id" ]; then
+  gh api -X PATCH "repos/$GITHUB_REPOSITORY/issues/comments/$comment_id" -F "body=@$body_file" >/dev/null
+else
+  gh pr comment "$PR_NUMBER" --body-file "$body_file"
+fi
diff --git a/.github/scripts/adversarial-review/render-review-summary.sh b/.github/scripts/adversarial-review/render-review-summary.sh
new file mode 100755
index 0000000..4151a10
--- /dev/null
+++ b/.github/scripts/adversarial-review/render-review-summary.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+python3 .github/scripts/render-adversarial-review-summary.py \
+  --response review-artifacts/agent-response.md \
+  --build-log review-artifacts/build.log \
+  --baseline-log review-artifacts/baseline-tests.log \
+  --build-status review-artifacts/build-status.txt \
+  --baseline-status review-artifacts/baseline-test-status.txt \
+  --output review-artifacts/adversarial-review-summary.md
+cat review-artifacts/adversarial-review-summary.md >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/scripts/adversarial-review/run-baseline-tests.sh b/.github/scripts/adversarial-review/run-baseline-tests.sh
new file mode 100755
index 0000000..d5b6eef
--- /dev/null
+++ b/.github/scripts/adversarial-review/run-baseline-tests.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set +e
+
+mkdir -p review-artifacts
+cargo test --verbose -- --test-threads=1 2>&1 | tee review-artifacts/baseline-tests.log
+status=${PIPESTATUS[0]}
+echo "$status" > review-artifacts/baseline-test-status.txt
+exit "$status"
diff --git a/.github/scripts/adversarial-review/validate-initial-agent-response.sh b/.github/scripts/adversarial-review/validate-initial-agent-response.sh
new file mode 100755
index 0000000..d9561da
--- /dev/null
+++ b/.github/scripts/adversarial-review/validate-initial-agent-response.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+mkdir -p review-artifacts
+python3 - <<'PY' >> "$GITHUB_OUTPUT"
+import importlib.util
+import os
+from pathlib import Path
+
+Path('review-artifacts/agent-response-initial.md').write_text(os.environ.get('AGENT_RESPONSE', ''), encoding='utf-8')
+spec = importlib.util.spec_from_file_location('review_summary', '.github/scripts/render-adversarial-review-summary.py')
+review_summary = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(review_summary)
+review, warning = review_summary.extract_json_blob(os.environ.get('AGENT_RESPONSE', ''))
+errors = []
+if review is None:
+    errors.append(warning or 'missing structured review result')
+else:
+    errors.extend(review_summary.validate_review(review))
+if errors:
+    print('valid=false')
+    print(f"reason={' ; '.join(errors)}")
+else:
+    print('valid=true')
+    print('reason=valid structured review result')
+PY
diff --git a/.github/scripts/render-adversarial-review-summary.py b/.github/scripts/render-adversarial-review-summary.py
new file mode 100644
index 0000000..2dfb254
--- /dev/null
+++ b/.github/scripts/render-adversarial-review-summary.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""Render a GitHub Actions summary from agent adversarial review output."""
+
+from __future__ import annotations
+
+import argparse
+import html
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+
+def read_text(path: Path | None) -> str:
+    if path is None or not path.exists():
+        return ""
+    return path.read_text(encoding="utf-8", errors="replace")
+
+
+def extract_json_blob(response: str) -> tuple[dict[str, Any] | None, str | None]:
+    marker_match = re.search(
+        r"^\s*JSON_RESULT_START\s*$\s*(.*?)^\s*JSON_RESULT_END\s*$",
+        response,
+        flags=re.IGNORECASE | re.DOTALL | re.MULTILINE,
+    )
+    if not marker_match:
+        return None, "Could not find JSON_RESULT_START / JSON_RESULT_END markers."
+
+    candidate = marker_match.group(1).strip()
+    fence_match = re.fullmatch(r"```(?:json)?\s*(.*?)\s*```", candidate, flags=re.IGNORECASE | re.DOTALL)
+    if fence_match:
+        candidate = fence_match.group(1).strip()
+
+    try:
+        parsed = json.loads(candidate)
+    except json.JSONDecodeError as error:
+        return None, f"Could not parse JSON review block between JSON_RESULT_START and JSON_RESULT_END: {error}"
+
+    if not isinstance(parsed, dict):
+        return None, "Structured review JSON must be an object."
+    return parsed, None
+
+
+def validate_review(review: dict[str, Any]) -> list[str]:
+    errors: list[str] = []
+    recommendation = review.get("recommendation")
+    if recommendation not in {"PASS", "FAIL", "INVESTIGATE"}:
+        errors.append("recommendation must be PASS, FAIL, or INVESTIGATE")
+
+    for field in ("why", "finalMessage"):
+        if not isinstance(review.get(field), str) or not review[field].strip():
+            errors.append(f"{field} must be a non-empty string")
+
+    tests = review.get("tests")
+    if not isinstance(tests, list) or not tests:
+        errors.append("tests must be a non-empty array")
+        return errors
+
+    required_test_fields = ("title", "hypothesis", "impact", "command", "output", "result", "unitTestRecommendation")
+    for index, test in enumerate(tests, start=1):
+        if not isinstance(test, dict):
+            errors.append(f"tests[{index}] must be an object")
+            continue
+        for field in required_test_fields:
+            if not isinstance(test.get(field), str) or not test[field].strip():
+                errors.append(f"tests[{index}].{field} must be a non-empty string")
+        if test.get("result") not in {"PASS", "FAIL"}:
+            errors.append(f"tests[{index}].result must be PASS or FAIL")
+
+    return errors
+
+
+def normalize_tests(value: Any) -> list[dict[str, Any]]:
+    if not isinstance(value, list):
+        return []
+    return [item for item in value if isinstance(item, dict)]
+
+
+def markdown_text(value: Any) -> str:
+    return html.escape(str(value), quote=False)
+
+
+def fenced_text(value: Any) -> str:
+    return html.escape(str(value), quote=False).replace("```", "`\\`\\`")
+
+
+def append_log_tail(lines: list[str], title: str, text: str, max_lines: int = 60) -> None:
+    if not text.strip():
+        return
+    tail = "\n".join(text.rstrip().splitlines()[-max_lines:])
+    lines.extend([
+        f"### {title}",
+        "",
+        "```text",
+        tail,
+        "```",
+        "",
+    ])
+
+
+def render_summary(args: argparse.Namespace) -> str:
+    response = read_text(args.response)
+    build_log = read_text(args.build_log)
+    baseline_log = read_text(args.baseline_log)
+    build_status = read_text(args.build_status).strip() if args.build_status and args.build_status.exists() else "unknown"
+    baseline_status = read_text(args.baseline_status).strip() if args.baseline_status and args.baseline_status.exists() else "unknown"
+    review, warning = extract_json_blob(response)
+
+    lines: list[str] = [
+        "## Adversarial review",
+        "",
+        f"- **Build exit code:** `{build_status or 'unknown'}`",
+        f"- **Baseline test exit code:** `{baseline_status or 'unknown'}`",
+    ]
+
+    if review is None:
+        lines.extend([
+            "- **Recommendation:** `UNKNOWN`",
+            "",
+            f"> ⚠️ {warning}",
+            "",
+        ])
+    else:
+        recommendation = str(review.get("recommendation", "UNKNOWN"))
+        why = str(review.get("why", "No rationale supplied."))
+        final_message = str(review.get("finalMessage", ""))
+        tests = normalize_tests(review.get("tests"))
+        validation_errors = validate_review(review)
+
+        lines.extend([
+            f"- **Recommendation:** `{markdown_text(recommendation)}`",
+            f"- **Why:** {markdown_text(why)}",
+        ])
+        if final_message:
+            lines.append(f"- **Final message:** {markdown_text(final_message)}")
+        if validation_errors:
+            lines.extend(["", "### Structured review validation errors", ""])
+            lines.extend(f"- {error}" for error in validation_errors)
+        lines.extend(["", "### Structured probes", ""])
+
+        if not tests:
+            lines.extend(["No structured probes were parsed from the agent response.", ""])
+        else:
+            for index, test in enumerate(tests, start=1):
+                title = str(test.get("title", f"Probe {index}"))
+                result = str(test.get("result", "UNKNOWN"))
+                lines.extend([
+                    f"#### {index}. {markdown_text(title)} — `{markdown_text(result)}`",
+                    "",
+                    f"- **Hypothesis:** {markdown_text(test.get('hypothesis', ''))}",
+                    f"- **Impact:** {markdown_text(test.get('impact', ''))}",
+                    "- **Command:**",
+                    "",
+                    "```text",
+                    fenced_text(test.get('command', '')),
+                    "```",
+                    "",
+                    f"- **Output:** {markdown_text(test.get('output', ''))}",
+                    f"- **Coverage recommendation:** {markdown_text(test.get('unitTestRecommendation', ''))}",
+                    "",
+                ])
+
+    lines.extend([
+        "### Full agent response",
+        "",
+        "<details>",
+        "<summary>Expand raw response</summary>",
+        "",
+        "````text",
+        fenced_text(response[-12000:]) if response else "(no agent response captured)",
+        "````",
+        "",
+        "</details>",
+        "",
+    ])
+
+    append_log_tail(lines, "Build log tail", build_log)
+    append_log_tail(lines, "Baseline test log tail", baseline_log)
+
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--response", type=Path, required=True)
+    parser.add_argument("--build-log", type=Path)
+    parser.add_argument("--baseline-log", type=Path)
+    parser.add_argument("--build-status", type=Path)
+    parser.add_argument("--baseline-status", type=Path)
+    parser.add_argument("--output", type=Path, required=True)
+    args = parser.parse_args()
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    args.output.write_text(render_summary(args), encoding="utf-8")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/adversarial-review.yml b/.github/workflows/adversarial-review.yml
new file mode 100644
index 0000000..a4c647f
--- /dev/null
+++ b/.github/workflows/adversarial-review.yml
@@ -0,0 +1,209 @@
+name: Adversarial review
+
+on:
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: Optional pull request number to review
+        required: false
+        default: ''
+      ref:
+        description: Optional ref, branch, or SHA to review when pr_number is empty
+        required: false
+        default: ''
+      post_comment:
+        description: Post or update the sticky PR review comment
+        required: false
+        type: boolean
+        default: false
+  pull_request:
+    branches: [main]
+    types: [opened, synchronize, reopened, ready_for_review]
+
+permissions:
+  contents: read
+  pull-requests: read
+  issues: write
+  actions: read
+
+concurrency:
+  group: adversarial-review-${{ github.workflow }}-${{ github.event.pull_request.number || github.event.inputs.pr_number || github.event.inputs.ref || github.sha }}
+  cancel-in-progress: false
+
+env:
+  CARGO_TERM_COLOR: always
+  PR_NUMBER: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }}
+  BASE_REF: ${{ github.event.pull_request.base.ref || 'main' }}
+  HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+
+jobs:
+  adversarial-review:
+    name: Adversarial review
+    # pull_request runs only for same-repository branches so repository/model secrets are not exposed to forked PR code.
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+
+    steps:
+      - name: Checkout target
+        uses: actions/checkout@v6.0.2
+        with:
+          submodules: recursive
+          fetch-depth: 0
+          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.event.inputs.ref || github.sha }}
+
+      - name: Checkout workflow_dispatch PR
+        if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.pr_number != '' }}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          REQUESTED_PR: ${{ github.event.inputs.pr_number }}
+        run: bash .github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh
+
+      - name: Install OS dependencies
+        run: bash .github/scripts/adversarial-review/install-os-dependencies.sh
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@v1
+        with:
+          toolchain: stable
+
+      - name: Setup Node.js for agent action
+        uses: actions/setup-node@v6
+        with:
+          node-version: '25'
+
+      - name: Build baseline binary
+        id: build
+        continue-on-error: true
+        run: bash .github/scripts/adversarial-review/build-baseline-binary.sh
+
+      - name: Run baseline tests
+        id: baseline_tests
+        continue-on-error: true
+        run: bash .github/scripts/adversarial-review/run-baseline-tests.sh
+
+      - name: Collect automated test inventory
+        if: always()
+        run: bash .github/scripts/adversarial-review/collect-test-inventory.sh
+
+      - name: Collect review context
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: bash .github/scripts/adversarial-review/collect-review-context.sh
+
+      - name: Compose review prompt
+        id: compose_prompt
+        run: bash .github/scripts/adversarial-review/compose-review-prompt.sh
+
+      - name: Run adversarial review
+        id: review_agent
+        continue-on-error: true
+        uses: cv/pi-action@main
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          output_mode: output
+          allowed_associations: OWNER,MEMBER,COLLABORATOR
+          prompt: ${{ steps.compose_prompt.outputs.prompt }}
+          pr_number: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }}
+          timeout: '1800'
+          share_session: true
+          provider: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER }}
+          model: ${{ vars.ADVERSARIAL_REVIEW_MODEL }}
+          api_key: ${{ secrets.ADVERSARIAL_REVIEW_API_KEY }}
+          provider_base_url: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_BASE_URL }}
+          provider_api: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_API }}
+          model_name: ${{ vars.ADVERSARIAL_REVIEW_MODEL_NAME }}
+          model_reasoning: ${{ vars.ADVERSARIAL_REVIEW_MODEL_REASONING }}
+          model_input: ${{ vars.ADVERSARIAL_REVIEW_MODEL_INPUT }}
+          model_context_window: ${{ vars.ADVERSARIAL_REVIEW_MODEL_CONTEXT_WINDOW }}
+          model_max_tokens: ${{ vars.ADVERSARIAL_REVIEW_MODEL_MAX_TOKENS }}
+        env:
+          NPM_CONFIG_IGNORE_SCRIPTS: 'true'
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+
+      - name: Validate initial agent response
+        id: validate_initial_response
+        if: always()
+        env:
+          AGENT_RESPONSE: ${{ steps.review_agent.outputs.response }}
+        run: bash .github/scripts/adversarial-review/validate-initial-agent-response.sh
+
+      - name: Compose retry review prompt
+        id: compose_retry_prompt
+        if: ${{ always() && steps.validate_initial_response.outputs.valid != 'true' }}
+        env:
+          RETRY_REASON: ${{ steps.validate_initial_response.outputs.reason }}
+        run: bash .github/scripts/adversarial-review/compose-retry-review-prompt.sh
+
+      - name: Retry adversarial review if structured result is missing
+        id: review_agent_retry
+        if: ${{ always() && steps.validate_initial_response.outputs.valid != 'true' }}
+        continue-on-error: true
+        uses: cv/pi-action@main
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          output_mode: output
+          allowed_associations: OWNER,MEMBER,COLLABORATOR
+          prompt: ${{ steps.compose_retry_prompt.outputs.prompt }}
+          pr_number: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }}
+          timeout: '1800'
+          share_session: true
+          provider: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER }}
+          model: ${{ vars.ADVERSARIAL_REVIEW_MODEL }}
+          api_key: ${{ secrets.ADVERSARIAL_REVIEW_API_KEY }}
+          provider_base_url: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_BASE_URL }}
+          provider_api: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_API }}
+          model_name: ${{ vars.ADVERSARIAL_REVIEW_MODEL_NAME }}
+          model_reasoning: ${{ vars.ADVERSARIAL_REVIEW_MODEL_REASONING }}
+          model_input: ${{ vars.ADVERSARIAL_REVIEW_MODEL_INPUT }}
+          model_context_window: ${{ vars.ADVERSARIAL_REVIEW_MODEL_CONTEXT_WINDOW }}
+          model_max_tokens: ${{ vars.ADVERSARIAL_REVIEW_MODEL_MAX_TOKENS }}
+        env:
+          NPM_CONFIG_IGNORE_SCRIPTS: 'true'
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+
+      - name: Persist agent response
+        if: always()
+        env:
+          INITIAL_VALID: ${{ steps.validate_initial_response.outputs.valid }}
+          INITIAL_RESPONSE: ${{ steps.review_agent.outputs.response }}
+          INITIAL_SUCCESS: ${{ steps.review_agent.outputs.success }}
+          INITIAL_SHARE_URL: ${{ steps.review_agent.outputs.share_url }}
+          RETRY_RESPONSE: ${{ steps.review_agent_retry.outputs.response }}
+          RETRY_SUCCESS: ${{ steps.review_agent_retry.outputs.success }}
+          RETRY_SHARE_URL: ${{ steps.review_agent_retry.outputs.share_url }}
+        run: bash .github/scripts/adversarial-review/persist-agent-response.sh
+
+      - name: Render review summary
+        if: always()
+        run: bash .github/scripts/adversarial-review/render-review-summary.sh
+
+      - name: Upload review artifacts
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: adversarial-review-${{ github.run_id }}-${{ github.run_attempt }}
+          path: review-artifacts/**
+          if-no-files-found: warn
+          retention-days: 14
+
+      - name: Post or update sticky PR comment
+        if: ${{ always() && env.PR_NUMBER != '' }}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          ADVERSARIAL_REVIEW_POST_COMMENTS: ${{ vars.ADVERSARIAL_REVIEW_POST_COMMENTS }}
+        run: bash .github/scripts/adversarial-review/post-sticky-pr-comment.sh
+
+      - name: Enforce adversarial review recommendation
+        if: always()
+        run: bash .github/scripts/adversarial-review/enforce-recommendation.sh