From 895b564d78ceb42525d71948a10145537aa4d6d1 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Sun, 3 May 2026 23:06:01 -0700 Subject: [PATCH] ci: use cv/enemy for enemy review --- .../{adversarial-review.md => enemy.md} | 16 +- .../build-baseline-binary.sh | 8 - .../checkout-workflow-dispatch-pr.sh | 7 - .../collect-review-context.sh | 26 --- .../collect-test-inventory.sh | 36 ---- .../compose-retry-review-prompt.sh | 17 -- .../compose-review-prompt.sh | 30 --- .../enforce-recommendation.sh | 36 ---- .../install-os-dependencies.sh | 5 - .../persist-agent-response.sh | 26 --- .../post-sticky-pr-comment.sh | 26 --- .../render-review-summary.sh | 11 - .../adversarial-review/run-baseline-tests.sh | 8 - .../validate-initial-agent-response.sh | 26 --- .../render-adversarial-review-summary.py | 198 ------------------ .github/workflows/adversarial-review.yml | 183 +++------------- 16 files changed, 35 insertions(+), 624 deletions(-) rename .github/prompts/{adversarial-review.md => enemy.md} (81%) delete mode 100755 .github/scripts/adversarial-review/build-baseline-binary.sh delete mode 100755 .github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh delete mode 100755 .github/scripts/adversarial-review/collect-review-context.sh delete mode 100755 .github/scripts/adversarial-review/collect-test-inventory.sh delete mode 100755 .github/scripts/adversarial-review/compose-retry-review-prompt.sh delete mode 100755 .github/scripts/adversarial-review/compose-review-prompt.sh delete mode 100755 .github/scripts/adversarial-review/enforce-recommendation.sh delete mode 100755 .github/scripts/adversarial-review/install-os-dependencies.sh delete mode 100755 .github/scripts/adversarial-review/persist-agent-response.sh delete mode 100755 .github/scripts/adversarial-review/post-sticky-pr-comment.sh delete mode 100755 .github/scripts/adversarial-review/render-review-summary.sh delete mode 100755 .github/scripts/adversarial-review/run-baseline-tests.sh delete mode 100755 .github/scripts/adversarial-review/validate-initial-agent-response.sh delete mode 100644 .github/scripts/render-adversarial-review-summary.py diff --git a/.github/prompts/adversarial-review.md b/.github/prompts/enemy.md similarity index 81% rename from .github/prompts/adversarial-review.md rename to .github/prompts/enemy.md index a4948b3..52c1188 100644 --- a/.github/prompts/adversarial-review.md +++ b/.github/prompts/enemy.md @@ -1,16 +1,16 @@ -# Bash AST adversarial review +# Bash AST enemy review -You are an adversarial reviewer for `bash-ast`, a Rust CLI/library that uses GNU Bash's real parser through FFI to parse shell scripts into JSON AST and convert JSON AST back to bash. +You are an enemy reviewer for `bash-ast`, a Rust CLI/library that uses GNU Bash's real parser through FFI to parse shell scripts into JSON AST and convert JSON AST back to bash. Your job is to add value beyond ordinary CI. Do not simply rerun the full test suite as your main contribution; the workflow has already captured baseline build/test logs for you. Instead, inspect the repository and the supplied context, identify parser behaviors worth challenging, and run a small number of targeted probes. ## What to inspect first - `README.md`, `Cargo.toml`, `src/`, and relevant tests under `tests/`. -- `review-artifacts/pr-context.json` if present. -- `review-artifacts/base-diff.stat` and `review-artifacts/base-diff.patch` if present. -- `review-artifacts/build.log`, `review-artifacts/baseline-tests.log`, and status files if present. -- `review-artifacts/baseline-test-inventory.md` and `review-artifacts/baseline-test-list.txt` for the automated tests that were already enumerated after the baseline test run. +- `enemy-artifacts/pr-context.json` if present. +- `enemy-artifacts/base-diff.stat` and `enemy-artifacts/base-diff.patch` if present. +- `enemy-artifacts/build.log`, `enemy-artifacts/baseline-tests.log`, and status files if present. +- `enemy-artifacts/baseline-test-inventory.md` and `enemy-artifacts/baseline-test-list.txt` for the automated tests that were already enumerated after the baseline test run. Before planning probes, inspect the automated test inventory so you do not duplicate existing coverage or claim a gap that is already covered by a listed test. If this run is associated with a PR, extract 2-4 concrete, testable claims from the PR title/body/diff before running probes. If there is no PR context, pick high-risk parser/round-trip behaviors from the current checkout. @@ -30,9 +30,9 @@ Prefer edge cases involving one or more of: For each probe: -1. Create temporary scripts/data only under `/tmp` or `review-artifacts/agent-probes/`. +1. Create temporary scripts/data only under `/tmp` or `enemy-artifacts/probes/`. 2. Use the repository's actual binary/library/test harness whenever practical. The built CLI is usually `target/debug/bash-ast` after `cargo build`. -3. Capture concise evidence. If output is long, write full logs to `review-artifacts/agent-probes/` and summarize the relevant lines. +3. Capture concise evidence. If output is long, write full logs to `enemy-artifacts/probes/` and summarize the relevant lines. 4. Decide whether the observed behavior supports or refutes the hypothesis. ## Constraints diff --git a/.github/scripts/adversarial-review/build-baseline-binary.sh b/.github/scripts/adversarial-review/build-baseline-binary.sh deleted file mode 100755 index 082369e..0000000 --- a/.github/scripts/adversarial-review/build-baseline-binary.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash -set +e - -mkdir -p review-artifacts -cargo build --verbose 2>&1 | tee review-artifacts/build.log -status=${PIPESTATUS[0]} -echo "$status" > review-artifacts/build-status.txt -exit "$status" diff --git a/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh b/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh deleted file mode 100755 index b276dba..0000000 --- a/.github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash -set -euxo pipefail - -gh pr checkout "$REQUESTED_PR" -git submodule update --init --recursive -echo "HEAD_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV" -echo "BASE_REF=$(gh pr view "$REQUESTED_PR" --json baseRefName --jq .baseRefName)" >> "$GITHUB_ENV" diff --git a/.github/scripts/adversarial-review/collect-review-context.sh b/.github/scripts/adversarial-review/collect-review-context.sh deleted file mode 100755 index fad3e3b..0000000 --- a/.github/scripts/adversarial-review/collect-review-context.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -set -euxo pipefail - -mkdir -p review-artifacts/agent-probes -git status --short > review-artifacts/git-status.txt -git log --oneline -n 20 > review-artifacts/recent-commits.txt -cargo metadata --no-deps --format-version 1 > review-artifacts/cargo-metadata.json || true - -git fetch origin "$BASE_REF" --depth=1 || true -if git rev-parse --verify "origin/$BASE_REF" >/dev/null 2>&1; then - git diff --stat "origin/$BASE_REF...HEAD" > review-artifacts/base-diff.stat || true - git diff --find-renames "origin/$BASE_REF...HEAD" > review-artifacts/base-diff.patch || true -else - : > review-artifacts/base-diff.stat - : > review-artifacts/base-diff.patch -fi - -if [ -n "$PR_NUMBER" ]; then - gh pr view "$PR_NUMBER" \ - --json number,title,author,body,baseRefName,headRefName,headRefOid,url,files,comments \ - > review-artifacts/pr-context.json || echo '{}' > review-artifacts/pr-context.json - gh pr diff "$PR_NUMBER" > review-artifacts/pr.diff || true -else - echo '{}' > review-artifacts/pr-context.json - : > review-artifacts/pr.diff -fi diff --git a/.github/scripts/adversarial-review/collect-test-inventory.sh b/.github/scripts/adversarial-review/collect-test-inventory.sh deleted file mode 100755 index d6d99a4..0000000 --- a/.github/scripts/adversarial-review/collect-test-inventory.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -set +e - -mkdir -p review-artifacts -cargo test -- --list 2>&1 | tee review-artifacts/baseline-test-list.txt -status=${PIPESTATUS[0]} -echo "$status" > review-artifacts/baseline-test-list-status.txt -python3 - <<'PY' -from pathlib import Path - -text = Path('review-artifacts/baseline-test-list.txt').read_text(encoding='utf-8', errors='replace') -tests = sorted({line.strip()[:-len(': test')] for line in text.splitlines() if line.strip().endswith(': test')}) -benches = sorted({line.strip()[:-len(': benchmark')] for line in text.splitlines() if line.strip().endswith(': benchmark')}) -status = Path('review-artifacts/baseline-test-list-status.txt').read_text(encoding='utf-8').strip() -lines = [ - '# Automated tests already enumerated', - '', - f'- Test inventory exit code: `{status}`', - f'- Enumerated test count: `{len(tests)}`', - f'- Enumerated benchmark count: `{len(benches)}`', - '', - 'The baseline workflow already ran `cargo test --verbose -- --test-threads=1` before this inventory was collected.', - 'Use this inventory to avoid duplicating existing automated coverage in adversarial probes and recommendations.', - '', - '## Test names', -] -max_names = 250 -lines.extend(f'- `{name}`' for name in tests[:max_names]) -if len(tests) > max_names: - lines.append(f'- ... truncated {len(tests) - max_names} additional tests; see `baseline-test-list.txt` for the full list.') -if benches: - lines.extend(['', '## Benchmark names']) - lines.extend(f'- `{name}`' for name in benches[:50]) -Path('review-artifacts/baseline-test-inventory.md').write_text('\n'.join(lines) + '\n', encoding='utf-8') -PY -exit 0 diff --git a/.github/scripts/adversarial-review/compose-retry-review-prompt.sh b/.github/scripts/adversarial-review/compose-retry-review-prompt.sh deleted file mode 100755 index 2c1bffb..0000000 --- a/.github/scripts/adversarial-review/compose-retry-review-prompt.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -delimiter="RETRY_REVIEW_PROMPT_$(date +%s)_$$" -{ - cat review-artifacts/review-prompt.md - echo - echo "## Retry instruction" - echo - echo "The previous adversarial-review attempt did not produce a valid structured result. Reason: ${RETRY_REASON:-unknown}." - echo "Retry the review now. You must end with exact line markers JSON_RESULT_START and JSON_RESULT_END, with one valid JSON object between them and no nested marker text inside JSON strings. The JSON must satisfy the required schema, including a non-empty tests array." -} > review-artifacts/review-prompt-retry.md -{ - echo "prompt<<$delimiter" - cat review-artifacts/review-prompt-retry.md - echo "$delimiter" -} >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/adversarial-review/compose-review-prompt.sh b/.github/scripts/adversarial-review/compose-review-prompt.sh deleted file mode 100755 index 4902a57..0000000 --- a/.github/scripts/adversarial-review/compose-review-prompt.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -delimiter="REVIEW_PROMPT_$(date +%s)_$$" -{ - cat .github/prompts/adversarial-review.md - echo - echo "## Workflow-provided context" - echo - echo "- Repository: $GITHUB_REPOSITORY" - echo "- Event: $GITHUB_EVENT_NAME" - echo "- PR number: ${PR_NUMBER:-none}" - echo "- Base ref: ${BASE_REF:-unknown}" - echo "- Head SHA: ${HEAD_SHA:-unknown}" - echo "- Build exit code: $(cat review-artifacts/build-status.txt 2>/dev/null || echo unknown)" - echo "- Baseline test exit code: $(cat review-artifacts/baseline-test-status.txt 2>/dev/null || echo unknown)" - echo "- Baseline test inventory exit code: $(cat review-artifacts/baseline-test-list-status.txt 2>/dev/null || echo unknown)" - echo - echo "Artifacts are available under ./review-artifacts/. Keep any additional probe artifacts under ./review-artifacts/agent-probes/." - if [ -f review-artifacts/baseline-test-inventory.md ]; then - echo - echo "## Automated tests already run/enumerated" - cat review-artifacts/baseline-test-inventory.md - fi -} > review-artifacts/review-prompt.md -{ - echo "prompt<<$delimiter" - cat review-artifacts/review-prompt.md - echo "$delimiter" -} >> "$GITHUB_OUTPUT" diff --git a/.github/scripts/adversarial-review/enforce-recommendation.sh b/.github/scripts/adversarial-review/enforce-recommendation.sh deleted file mode 100755 index a4698d6..0000000 --- a/.github/scripts/adversarial-review/enforce-recommendation.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -python3 - <<'PY' -import importlib.util -import sys -from pathlib import Path - -module_path = Path('.github/scripts/render-adversarial-review-summary.py') -spec = importlib.util.spec_from_file_location('review_summary', module_path) -if spec is None or spec.loader is None: - print(f'::error::Could not load review summary parser from {module_path}') - sys.exit(1) - -review_summary = importlib.util.module_from_spec(spec) -spec.loader.exec_module(review_summary) - -response_path = Path('review-artifacts/agent-response.md') -response = response_path.read_text(encoding='utf-8', errors='replace') if response_path.exists() else '' -review, warning = review_summary.extract_json_blob(response) -if review is None: - print(f'::error::Adversarial review did not produce a valid structured recommendation: {warning}') - sys.exit(1) - -validation_errors = review_summary.validate_review(review) -if validation_errors: - for error in validation_errors: - print(f'::error::Invalid adversarial review result: {error}') - sys.exit(1) - -recommendation = str(review.get('recommendation', '')).strip() -print(f'Adversarial review recommendation: {recommendation}') -if recommendation != 'PASS': - print(f'::error::Adversarial review recommendation is {recommendation}; failing the check so the PR is not mergeable as-is.') - sys.exit(1) -PY diff --git a/.github/scripts/adversarial-review/install-os-dependencies.sh b/.github/scripts/adversarial-review/install-os-dependencies.sh deleted file mode 100755 index deeaf9e..0000000 --- a/.github/scripts/adversarial-review/install-os-dependencies.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -set -euxo pipefail - -sudo apt-get update -sudo apt-get install -y libncurses-dev diff --git a/.github/scripts/adversarial-review/persist-agent-response.sh b/.github/scripts/adversarial-review/persist-agent-response.sh deleted file mode 100755 index 74dd1ba..0000000 --- a/.github/scripts/adversarial-review/persist-agent-response.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -mkdir -p review-artifacts -python3 - <<'PY' -import json -import os -from pathlib import Path - -use_retry = os.environ.get('INITIAL_VALID') != 'true' -response = os.environ.get('RETRY_RESPONSE' if use_retry else 'INITIAL_RESPONSE', '') -success = os.environ.get('RETRY_SUCCESS' if use_retry else 'INITIAL_SUCCESS', '') -share_url = os.environ.get('RETRY_SHARE_URL' if use_retry else 'INITIAL_SHARE_URL', '') -Path('review-artifacts/agent-response.md').write_text(response, encoding='utf-8') -if use_retry: - Path('review-artifacts/agent-response-retry.md').write_text(os.environ.get('RETRY_RESPONSE', ''), encoding='utf-8') -Path('review-artifacts/agent-action-metadata.json').write_text( - json.dumps({ - 'selected_attempt': 'retry' if use_retry else 'initial', - 'initial_valid': os.environ.get('INITIAL_VALID', ''), - 'success': success, - 'share_url': share_url, - }, indent=2) + '\n', - encoding='utf-8', -) -PY diff --git a/.github/scripts/adversarial-review/post-sticky-pr-comment.sh b/.github/scripts/adversarial-review/post-sticky-pr-comment.sh deleted file mode 100755 index 536909b..0000000 --- a/.github/scripts/adversarial-review/post-sticky-pr-comment.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -post_comment_input=$(jq -r '.inputs.post_comment // "false"' "$GITHUB_EVENT_PATH") -if [ "${ADVERSARIAL_REVIEW_POST_COMMENTS:-}" != "true" ] && [ "$post_comment_input" != "true" ]; then - echo "Sticky PR comment disabled; set ADVERSARIAL_REVIEW_POST_COMMENTS=true or workflow_dispatch post_comment=true to enable." - exit 0 -fi - -marker='' -body_file=$(mktemp) -{ - echo "$marker" - echo "" - echo - cat review-artifacts/adversarial-review-summary.md -} > "$body_file" - -comment_id=$(gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" --paginate \ - --jq ".[] | select(.body | contains(\"$marker\")) | .id" | tail -n 1) - -if [ -n "$comment_id" ]; then - gh api -X PATCH "repos/$GITHUB_REPOSITORY/issues/comments/$comment_id" -F "body=@$body_file" >/dev/null -else - gh pr comment "$PR_NUMBER" --body-file "$body_file" -fi diff --git a/.github/scripts/adversarial-review/render-review-summary.sh b/.github/scripts/adversarial-review/render-review-summary.sh deleted file mode 100755 index 4151a10..0000000 --- a/.github/scripts/adversarial-review/render-review-summary.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euxo pipefail - -python3 .github/scripts/render-adversarial-review-summary.py \ - --response review-artifacts/agent-response.md \ - --build-log review-artifacts/build.log \ - --baseline-log review-artifacts/baseline-tests.log \ - --build-status review-artifacts/build-status.txt \ - --baseline-status review-artifacts/baseline-test-status.txt \ - --output review-artifacts/adversarial-review-summary.md -cat review-artifacts/adversarial-review-summary.md >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/scripts/adversarial-review/run-baseline-tests.sh b/.github/scripts/adversarial-review/run-baseline-tests.sh deleted file mode 100755 index d5b6eef..0000000 --- a/.github/scripts/adversarial-review/run-baseline-tests.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash -set +e - -mkdir -p review-artifacts -cargo test --verbose -- --test-threads=1 2>&1 | tee review-artifacts/baseline-tests.log -status=${PIPESTATUS[0]} -echo "$status" > review-artifacts/baseline-test-status.txt -exit "$status" diff --git a/.github/scripts/adversarial-review/validate-initial-agent-response.sh b/.github/scripts/adversarial-review/validate-initial-agent-response.sh deleted file mode 100755 index d9561da..0000000 --- a/.github/scripts/adversarial-review/validate-initial-agent-response.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -mkdir -p review-artifacts -python3 - <<'PY' >> "$GITHUB_OUTPUT" -import importlib.util -import os -from pathlib import Path - -Path('review-artifacts/agent-response-initial.md').write_text(os.environ.get('AGENT_RESPONSE', ''), encoding='utf-8') -spec = importlib.util.spec_from_file_location('review_summary', '.github/scripts/render-adversarial-review-summary.py') -review_summary = importlib.util.module_from_spec(spec) -spec.loader.exec_module(review_summary) -review, warning = review_summary.extract_json_blob(os.environ.get('AGENT_RESPONSE', '')) -errors = [] -if review is None: - errors.append(warning or 'missing structured review result') -else: - errors.extend(review_summary.validate_review(review)) -if errors: - print('valid=false') - print(f"reason={' ; '.join(errors)}") -else: - print('valid=true') - print('reason=valid structured review result') -PY diff --git a/.github/scripts/render-adversarial-review-summary.py b/.github/scripts/render-adversarial-review-summary.py deleted file mode 100644 index 2dfb254..0000000 --- a/.github/scripts/render-adversarial-review-summary.py +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env python3 -"""Render a GitHub Actions summary from agent adversarial review output.""" - -from __future__ import annotations - -import argparse -import html -import json -import re -from pathlib import Path -from typing import Any - - -def read_text(path: Path | None) -> str: - if path is None or not path.exists(): - return "" - return path.read_text(encoding="utf-8", errors="replace") - - -def extract_json_blob(response: str) -> tuple[dict[str, Any] | None, str | None]: - marker_match = re.search( - r"^\s*JSON_RESULT_START\s*$\s*(.*?)^\s*JSON_RESULT_END\s*$", - response, - flags=re.IGNORECASE | re.DOTALL | re.MULTILINE, - ) - if not marker_match: - return None, "Could not find JSON_RESULT_START / JSON_RESULT_END markers." - - candidate = marker_match.group(1).strip() - fence_match = re.fullmatch(r"```(?:json)?\s*(.*?)\s*```", candidate, flags=re.IGNORECASE | re.DOTALL) - if fence_match: - candidate = fence_match.group(1).strip() - - try: - parsed = json.loads(candidate) - except json.JSONDecodeError as error: - return None, f"Could not parse JSON review block between JSON_RESULT_START and JSON_RESULT_END: {error}" - - if not isinstance(parsed, dict): - return None, "Structured review JSON must be an object." - return parsed, None - - -def validate_review(review: dict[str, Any]) -> list[str]: - errors: list[str] = [] - recommendation = review.get("recommendation") - if recommendation not in {"PASS", "FAIL", "INVESTIGATE"}: - errors.append("recommendation must be PASS, FAIL, or INVESTIGATE") - - for field in ("why", "finalMessage"): - if not isinstance(review.get(field), str) or not review[field].strip(): - errors.append(f"{field} must be a non-empty string") - - tests = review.get("tests") - if not isinstance(tests, list) or not tests: - errors.append("tests must be a non-empty array") - return errors - - required_test_fields = ("title", "hypothesis", "impact", "command", "output", "result", "unitTestRecommendation") - for index, test in enumerate(tests, start=1): - if not isinstance(test, dict): - errors.append(f"tests[{index}] must be an object") - continue - for field in required_test_fields: - if not isinstance(test.get(field), str) or not test[field].strip(): - errors.append(f"tests[{index}].{field} must be a non-empty string") - if test.get("result") not in {"PASS", "FAIL"}: - errors.append(f"tests[{index}].result must be PASS or FAIL") - - return errors - - -def normalize_tests(value: Any) -> list[dict[str, Any]]: - if not isinstance(value, list): - return [] - return [item for item in value if isinstance(item, dict)] - - -def markdown_text(value: Any) -> str: - return html.escape(str(value), quote=False) - - -def fenced_text(value: Any) -> str: - return html.escape(str(value), quote=False).replace("```", "`\\`\\`") - - -def append_log_tail(lines: list[str], title: str, text: str, max_lines: int = 60) -> None: - if not text.strip(): - return - tail = "\n".join(text.rstrip().splitlines()[-max_lines:]) - lines.extend([ - f"### {title}", - "", - "```text", - tail, - "```", - "", - ]) - - -def render_summary(args: argparse.Namespace) -> str: - response = read_text(args.response) - build_log = read_text(args.build_log) - baseline_log = read_text(args.baseline_log) - build_status = read_text(args.build_status).strip() if args.build_status and args.build_status.exists() else "unknown" - baseline_status = read_text(args.baseline_status).strip() if args.baseline_status and args.baseline_status.exists() else "unknown" - review, warning = extract_json_blob(response) - - lines: list[str] = [ - "## Adversarial review", - "", - f"- **Build exit code:** `{build_status or 'unknown'}`", - f"- **Baseline test exit code:** `{baseline_status or 'unknown'}`", - ] - - if review is None: - lines.extend([ - "- **Recommendation:** `UNKNOWN`", - "", - f"> ⚠️ {warning}", - "", - ]) - else: - recommendation = str(review.get("recommendation", "UNKNOWN")) - why = str(review.get("why", "No rationale supplied.")) - final_message = str(review.get("finalMessage", "")) - tests = normalize_tests(review.get("tests")) - validation_errors = validate_review(review) - - lines.extend([ - f"- **Recommendation:** `{markdown_text(recommendation)}`", - f"- **Why:** {markdown_text(why)}", - ]) - if final_message: - lines.append(f"- **Final message:** {markdown_text(final_message)}") - if validation_errors: - lines.extend(["", "### Structured review validation errors", ""]) - lines.extend(f"- {error}" for error in validation_errors) - lines.extend(["", "### Structured probes", ""]) - - if not tests: - lines.extend(["No structured probes were parsed from the agent response.", ""]) - else: - for index, test in enumerate(tests, start=1): - title = str(test.get("title", f"Probe {index}")) - result = str(test.get("result", "UNKNOWN")) - lines.extend([ - f"#### {index}. {markdown_text(title)} — `{markdown_text(result)}`", - "", - f"- **Hypothesis:** {markdown_text(test.get('hypothesis', ''))}", - f"- **Impact:** {markdown_text(test.get('impact', ''))}", - "- **Command:**", - "", - "```text", - fenced_text(test.get('command', '')), - "```", - "", - f"- **Output:** {markdown_text(test.get('output', ''))}", - f"- **Coverage recommendation:** {markdown_text(test.get('unitTestRecommendation', ''))}", - "", - ]) - - lines.extend([ - "### Full agent response", - "", - "
", - "Expand raw response", - "", - "````text", - fenced_text(response[-12000:]) if response else "(no agent response captured)", - "````", - "", - "
", - "", - ]) - - append_log_tail(lines, "Build log tail", build_log) - append_log_tail(lines, "Baseline test log tail", baseline_log) - - return "\n".join(lines) - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument("--response", type=Path, required=True) - parser.add_argument("--build-log", type=Path) - parser.add_argument("--baseline-log", type=Path) - parser.add_argument("--build-status", type=Path) - parser.add_argument("--baseline-status", type=Path) - parser.add_argument("--output", type=Path, required=True) - args = parser.parse_args() - - args.output.parent.mkdir(parents=True, exist_ok=True) - args.output.write_text(render_summary(args), encoding="utf-8") - - -if __name__ == "__main__": - main() diff --git a/.github/workflows/adversarial-review.yml b/.github/workflows/adversarial-review.yml index a4c647f..533f068 100644 --- a/.github/workflows/adversarial-review.yml +++ b/.github/workflows/adversarial-review.yml @@ -1,4 +1,4 @@ -name: Adversarial review +name: Enemy review on: workflow_dispatch: @@ -27,18 +27,12 @@ permissions: actions: read concurrency: - group: adversarial-review-${{ github.workflow }}-${{ github.event.pull_request.number || github.event.inputs.pr_number || github.event.inputs.ref || github.sha }} + group: enemy-review-${{ github.workflow }}-${{ github.event.pull_request.number || github.event.inputs.pr_number || github.event.inputs.ref || github.sha }} cancel-in-progress: false -env: - CARGO_TERM_COLOR: always - PR_NUMBER: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }} - BASE_REF: ${{ github.event.pull_request.base.ref || 'main' }} - HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }} - jobs: - adversarial-review: - name: Adversarial review + enemy: + name: Enemy review # pull_request runs only for same-repository branches so repository/model secrets are not exposed to forked PR code. if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }} runs-on: ubuntu-latest @@ -57,153 +51,30 @@ jobs: env: GH_TOKEN: ${{ github.token }} REQUESTED_PR: ${{ github.event.inputs.pr_number }} - run: bash .github/scripts/adversarial-review/checkout-workflow-dispatch-pr.sh - - - name: Install OS dependencies - run: bash .github/scripts/adversarial-review/install-os-dependencies.sh - - - name: Install Rust - uses: dtolnay/rust-toolchain@v1 - with: - toolchain: stable - - - name: Setup Node.js for agent action - uses: actions/setup-node@v6 - with: - node-version: '25' - - - name: Build baseline binary - id: build - continue-on-error: true - run: bash .github/scripts/adversarial-review/build-baseline-binary.sh - - - name: Run baseline tests - id: baseline_tests - continue-on-error: true - run: bash .github/scripts/adversarial-review/run-baseline-tests.sh - - - name: Collect automated test inventory - if: always() - run: bash .github/scripts/adversarial-review/collect-test-inventory.sh - - - name: Collect review context - env: - GH_TOKEN: ${{ github.token }} - run: bash .github/scripts/adversarial-review/collect-review-context.sh - - - name: Compose review prompt - id: compose_prompt - run: bash .github/scripts/adversarial-review/compose-review-prompt.sh - - - name: Run adversarial review - id: review_agent - continue-on-error: true - uses: cv/pi-action@main - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - output_mode: output - allowed_associations: OWNER,MEMBER,COLLABORATOR - prompt: ${{ steps.compose_prompt.outputs.prompt }} - pr_number: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }} - timeout: '1800' - share_session: true - provider: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER }} - model: ${{ vars.ADVERSARIAL_REVIEW_MODEL }} - api_key: ${{ secrets.ADVERSARIAL_REVIEW_API_KEY }} - provider_base_url: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_BASE_URL }} - provider_api: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_API }} - model_name: ${{ vars.ADVERSARIAL_REVIEW_MODEL_NAME }} - model_reasoning: ${{ vars.ADVERSARIAL_REVIEW_MODEL_REASONING }} - model_input: ${{ vars.ADVERSARIAL_REVIEW_MODEL_INPUT }} - model_context_window: ${{ vars.ADVERSARIAL_REVIEW_MODEL_CONTEXT_WINDOW }} - model_max_tokens: ${{ vars.ADVERSARIAL_REVIEW_MODEL_MAX_TOKENS }} - env: - NPM_CONFIG_IGNORE_SCRIPTS: 'true' - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} - OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} - - - name: Validate initial agent response - id: validate_initial_response - if: always() - env: - AGENT_RESPONSE: ${{ steps.review_agent.outputs.response }} - run: bash .github/scripts/adversarial-review/validate-initial-agent-response.sh + run: | + set -euxo pipefail + gh pr checkout "$REQUESTED_PR" + git submodule update --init --recursive - - name: Compose retry review prompt - id: compose_retry_prompt - if: ${{ always() && steps.validate_initial_response.outputs.valid != 'true' }} - env: - RETRY_REASON: ${{ steps.validate_initial_response.outputs.reason }} - run: bash .github/scripts/adversarial-review/compose-retry-review-prompt.sh - - - name: Retry adversarial review if structured result is missing - id: review_agent_retry - if: ${{ always() && steps.validate_initial_response.outputs.valid != 'true' }} - continue-on-error: true - uses: cv/pi-action@main + - name: Run enemy + uses: cv/enemy@main with: github_token: ${{ secrets.GITHUB_TOKEN }} - output_mode: output - allowed_associations: OWNER,MEMBER,COLLABORATOR - prompt: ${{ steps.compose_retry_prompt.outputs.prompt }} + prompt_file: .github/prompts/enemy.md pr_number: ${{ github.event.pull_request.number || github.event.inputs.pr_number || '' }} - timeout: '1800' - share_session: true - provider: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER }} - model: ${{ vars.ADVERSARIAL_REVIEW_MODEL }} - api_key: ${{ secrets.ADVERSARIAL_REVIEW_API_KEY }} - provider_base_url: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_BASE_URL }} - provider_api: ${{ vars.ADVERSARIAL_REVIEW_PROVIDER_API }} - model_name: ${{ vars.ADVERSARIAL_REVIEW_MODEL_NAME }} - model_reasoning: ${{ vars.ADVERSARIAL_REVIEW_MODEL_REASONING }} - model_input: ${{ vars.ADVERSARIAL_REVIEW_MODEL_INPUT }} - model_context_window: ${{ vars.ADVERSARIAL_REVIEW_MODEL_CONTEXT_WINDOW }} - model_max_tokens: ${{ vars.ADVERSARIAL_REVIEW_MODEL_MAX_TOKENS }} - env: - NPM_CONFIG_IGNORE_SCRIPTS: 'true' - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} - OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} - - - name: Persist agent response - if: always() - env: - INITIAL_VALID: ${{ steps.validate_initial_response.outputs.valid }} - INITIAL_RESPONSE: ${{ steps.review_agent.outputs.response }} - INITIAL_SUCCESS: ${{ steps.review_agent.outputs.success }} - INITIAL_SHARE_URL: ${{ steps.review_agent.outputs.share_url }} - RETRY_RESPONSE: ${{ steps.review_agent_retry.outputs.response }} - RETRY_SUCCESS: ${{ steps.review_agent_retry.outputs.success }} - RETRY_SHARE_URL: ${{ steps.review_agent_retry.outputs.share_url }} - run: bash .github/scripts/adversarial-review/persist-agent-response.sh - - - name: Render review summary - if: always() - run: bash .github/scripts/adversarial-review/render-review-summary.sh - - - name: Upload review artifacts - if: always() - uses: actions/upload-artifact@v7 - with: - name: adversarial-review-${{ github.run_id }}-${{ github.run_attempt }} - path: review-artifacts/** - if-no-files-found: warn - retention-days: 14 - - - name: Post or update sticky PR comment - if: ${{ always() && env.PR_NUMBER != '' }} - env: - GH_TOKEN: ${{ github.token }} - ADVERSARIAL_REVIEW_POST_COMMENTS: ${{ vars.ADVERSARIAL_REVIEW_POST_COMMENTS }} - run: bash .github/scripts/adversarial-review/post-sticky-pr-comment.sh - - - name: Enforce adversarial review recommendation - if: always() - run: bash .github/scripts/adversarial-review/enforce-recommendation.sh + baseline_command: | + cargo build --verbose + cargo test --verbose -- --test-threads=1 + test_inventory_command: cargo test -- --list + post_comment: ${{ vars.ENEMY_POST_COMMENTS == 'true' || (github.event_name == 'workflow_dispatch' && github.event.inputs.post_comment == 'true') }} + comment_marker: '' + provider: ${{ vars.ENEMY_PROVIDER }} + model: ${{ vars.ENEMY_MODEL }} + api_key: ${{ secrets.ENEMY_API_KEY }} + provider_base_url: ${{ vars.ENEMY_PROVIDER_BASE_URL }} + provider_api: ${{ vars.ENEMY_PROVIDER_API }} + model_name: ${{ vars.ENEMY_MODEL_NAME }} + model_reasoning: ${{ vars.ENEMY_MODEL_REASONING }} + model_input: ${{ vars.ENEMY_MODEL_INPUT }} + model_context_window: ${{ vars.ENEMY_MODEL_CONTEXT_WINDOW }} + model_max_tokens: ${{ vars.ENEMY_MODEL_MAX_TOKENS }}