From 8ea8e428deb79d998d56f0e5b93fe1b5eab3a545 Mon Sep 17 00:00:00 2001 From: aaight Date: Mon, 18 May 2026 20:59:52 +0200 Subject: [PATCH 1/4] fix: align Cascade agent runtime images with common review tools (#1380) * fix: align Cascade agent runtime images with common review tools * fix: address feedback --------- Co-authored-by: Cascade Bot --- .github/workflows/ci.yml | 10 ++ .github/workflows/deploy-dev.yml | 11 ++- .github/workflows/deploy.yml | 13 ++- CHANGELOG.md | 4 + Dockerfile.worker | 40 +++++++- README.md | 2 +- docs/architecture/05-engine-backends.md | 19 ++++ docs/getting-started.md | 2 +- src/backends/shared/envFilter.ts | 10 ++ src/backends/shared/nativeToolPrompts.ts | 9 ++ tests/docker/worker-runtime-tools/run-test.sh | 95 +++++++++++++++++++ tests/unit/backends/shared-envBuilder.test.ts | 46 +++++++++ tests/unit/backends/shared-envFilter.test.ts | 28 ++++++ .../backends/shared-nativeToolPrompts.test.ts | 55 +++++++++++ 14 files changed, 339 insertions(+), 5 deletions(-) create mode 100755 tests/docker/worker-runtime-tools/run-test.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a636b151c..4248a3d5b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -136,6 +136,16 @@ jobs: - name: Build worker image run: docker build -f Dockerfile.worker -t cascade-worker:ci-check . + # MNG-1055: the worker image must include the baseline native-session + # runtime (Python shim + Playwright Chromium cache + the env var the + # native-tool env filter forwards to agent shells). The smoke script + # below fails loudly if any of those are missing — keeps the worker + # image aligned with the contract documented in + # docs/architecture/05-engine-backends.md and surfaced to agents in + # the native-tool prompts. + - name: Smoke-test worker runtime tools + run: WORKER_IMAGE=cascade-worker:ci-check tests/docker/worker-runtime-tools/run-test.sh + enforce-dev-to-main: runs-on: ubuntu-latest if: github.event_name == 'pull_request' && github.base_ref == 'main' diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml index 61b01f571..366b39493 100644 --- a/.github/workflows/deploy-dev.yml +++ b/.github/workflows/deploy-dev.yml @@ -39,13 +39,22 @@ jobs: docker run --rm ${{ env.ROUTER_IMAGE }}:dev-${{ github.sha }} \ node --check dist/router/index.js - - name: Build and push worker image + - name: Build worker image run: | docker build \ --label org.opencontainers.image.revision=${{ github.sha }} \ -f Dockerfile.worker \ -t ${{ env.WORKER_IMAGE }}:dev \ -t ${{ env.WORKER_IMAGE }}:dev-${{ github.sha }} . + + # MNG-1055: gate the dev worker push on the same runtime smoke test + # CI runs so dev never publishes an image lacking python, Playwright + # Chromium, or a working PLAYWRIGHT_BROWSERS_PATH. + - name: Smoke-test worker runtime tools + run: WORKER_IMAGE=${{ env.WORKER_IMAGE }}:dev-${{ github.sha }} tests/docker/worker-runtime-tools/run-test.sh + + - name: Push worker image + run: | docker push ${{ env.WORKER_IMAGE }}:dev docker push ${{ env.WORKER_IMAGE }}:dev-${{ github.sha }} diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 28c4a0726..b97d88728 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -39,7 +39,7 @@ jobs: docker run --rm ${{ env.ROUTER_IMAGE }}:${{ github.sha }} \ node --check dist/router/index.js - - name: Build and push worker image + - name: Build worker image run: | docker build \ --label org.opencontainers.image.revision=${{ github.sha }} \ @@ -47,6 +47,17 @@ jobs: -f Dockerfile.worker \ -t ${{ env.WORKER_IMAGE }}:latest \ -t ${{ env.WORKER_IMAGE }}:${{ github.sha }} . + + # MNG-1055: gate the production worker push on the same runtime smoke + # test CI runs. A worker image without `python`, Playwright Chromium, + # or a working `PLAYWRIGHT_BROWSERS_PATH` would break agents the + # moment it reached production, so we fail before publishing the + # `:latest` / SHA tags. + - name: Smoke-test worker runtime tools + run: WORKER_IMAGE=${{ env.WORKER_IMAGE }}:${{ github.sha }} tests/docker/worker-runtime-tools/run-test.sh + + - name: Push worker image + run: | docker push ${{ env.WORKER_IMAGE }}:latest docker push ${{ env.WORKER_IMAGE }}:${{ github.sha }} diff --git a/CHANGELOG.md b/CHANGELOG.md index f55b4c166..00df6bd35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable user-visible changes to CASCADE are documented here. The format is l ## Unreleased +### Changed + +- **Worker image now ships a Python shim and a shared Playwright Chromium cache as native-session baseline tools** ([MNG-1055](https://linear.app/issue/MNG-1055)). `Dockerfile.worker` installs `python3` + `python-is-python3` so both `python` and `python3` resolve to the same Debian-owned interpreter (closing the friction cluster MNG-887/897/926/934/947/957/973/1010/1024/1033/1039/1044). It also installs a pinned `@playwright/test` plus Chromium with browser dependencies into `/ms-playwright` (closing MNG-998/1048), then makes that cache owned by the runtime `node` user so project `.cascade/setup.sh` scripts pinned to a different Playwright revision can install the missing Chromium revision into the inherited `$PLAYWRIGHT_BROWSERS_PATH`. The native-tool env filter (`src/backends/shared/envFilter.ts`) now allowlists `PLAYWRIGHT_BROWSERS_PATH` as an exact match so the cache is reachable from every native-tool engine subprocess; the broader `PLAYWRIGHT_*` prefix is intentionally left out to preserve the defense-in-depth env-allowlist posture. A new Docker smoke script (`tests/docker/worker-runtime-tools/run-test.sh`) validates `python`, `python3`, `python -c 'import json'`, the Playwright Chromium launch path, the env var, and node-user write access to the cache, and is wired into CI (`docker-build-check`) and both deploy workflows (`deploy.yml` / `deploy-dev.yml`) **before** the worker image is pushed, so a broken baseline cannot reach `:latest` / `:dev` tags. The native-tool system prompt (`src/backends/shared/nativeToolPrompts.ts`) exposes the guaranteed tools to agents under a new "Guaranteed runtime tools" section, and the engine-backends architecture doc, README, and Getting Started prerequisites describe the runtime baseline (including the image-size implication). + ### Documentation - **Friction reporting is now documented for operators and provider contributors.** Architecture docs cover the optional PM Friction slot (`lists.friction` for Trello, `statuses.friction` for JIRA/Linear), `ReportFriction`, and `cascade-tools pm report-friction --details-file -`. The integration guide explains that friction reports use existing provider `createWorkItem` plus optional `moveWorkItem`, so providers do not need a new adapter method or a DB-backed friction index. Resilience docs describe the JSONL sidecar/outbox retry path, missing-slot behavior, and non-blocking drain failures. See Trello card [Rvv7VVd5](https://trello.com/c/69ff6af3bc5c526cc5faa2d4). diff --git a/Dockerfile.worker b/Dockerfile.worker index d2db9c803..f7abb2772 100644 --- a/Dockerfile.worker +++ b/Dockerfile.worker @@ -24,6 +24,13 @@ LABEL cascade.managed=true RUN npm install -g pnpm --force # Install system packages needed by agent runtime +# +# `python3` plus `python-is-python3` provide a Debian-owned Python shim so +# both `python` and `python3` work predictably for `python -c 'import json'` +# and other lightweight scripting agents reach for. The package ownership is +# load-bearing — manual `ln -s python3 python` symlinks drift on base-image +# changes; the apt package survives those cleanly. See MNG-1055 (and the +# friction cluster MNG-887/897/926/934/947/957/973/1010/1024/1033/1039/1044). RUN apt-get update && apt-get install -y \ ca-certificates \ curl \ @@ -36,6 +43,8 @@ RUN apt-get update && apt-get install -y \ postgresql-client \ procps \ psutils \ + python3 \ + python-is-python3 \ redis-tools \ ripgrep \ ruby \ @@ -43,7 +52,10 @@ RUN apt-get update && apt-get install -y \ tmux \ unzip \ && rm -rf /var/lib/apt/lists/* \ - && ln -s $(which fdfind) /usr/local/bin/fd + && ln -s $(which fdfind) /usr/local/bin/fd \ + && python3 --version \ + && python --version \ + && python -c 'import json; print(json.dumps({"ok": True}))' # Configure tmux to keep panes alive after command exits # This allows capturing output and exit code from fast-exiting commands @@ -65,6 +77,32 @@ RUN npm install -g \ @openai/codex@0.125.0 \ opencode-ai@1.14.25 +# Playwright browser cache. +# +# Workers regularly review PRs that include UI/Playwright tests, and review +# agents call `playwright test` / `playwright launch chromium` in shell +# sessions. Without a baseline browser cache they hit +# `browserType.launch: Executable doesn't exist` and waste budget trying to +# install Chromium themselves (often into `~/.cache/ms-playwright` for the +# wrong user, or while offline). See friction cluster MNG-998 / MNG-1048. +# +# The cache lives at a stable non-home path so: +# - Chromium binaries are owned by the runtime `node` user and readable by all +# users. +# - Project setup scripts that need a different Playwright revision can write +# the missing browser revision into the same cache instead of failing on a +# root-owned directory. +# - The `USER node` switch below does not invalidate or duplicate the cache. +# - `PLAYWRIGHT_BROWSERS_PATH` can be allowlisted by the native-tool env +# filter (`src/backends/shared/envFilter.ts`) and forwarded to agent +# subprocesses without leaking any other Playwright config. +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright +RUN npm install -g @playwright/test@1.49.1 \ + && PLAYWRIGHT_BROWSERS_PATH=/ms-playwright \ + npx --yes playwright@1.49.1 install --with-deps chromium \ + && chown -R node:node /ms-playwright \ + && chmod -R u+rwX,go+rX /ms-playwright + # Switch to non-root user for running workers. # Claude Code CLI refuses --dangerously-skip-permissions when running as root. # The node user (uid 1000) is pre-created by the Node.js base image and matches diff --git a/README.md b/README.md index 4d7e175da..72c559e81 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ The included `docker-compose.yml` runs all services with a single command. Worke |-------|-----------|---------| | Dashboard + Frontend | `Dockerfile.selfhosted` | API server + web UI (combined) | | Router | `Dockerfile.router` | Webhook receiver, worker orchestration | -| Worker | `Dockerfile.worker` | Full agent runtime (clones repos, runs AI) | +| Worker | `Dockerfile.worker` | Full agent runtime (clones repos, runs AI). Ships a baseline native-session toolchain (`python`/`python3`, `jq`, `rg`, `fd`, `git`, `tmux`, `cascade-tools`) and a shared Playwright Chromium cache at `$PLAYWRIGHT_BROWSERS_PATH=/ms-playwright`. See [engine-backends](./docs/architecture/05-engine-backends.md#worker-image-runtime-baseline-mng-1055). | **Required production environment variables:** diff --git a/docs/architecture/05-engine-backends.md b/docs/architecture/05-engine-backends.md index fe638fe82..f4ec06378 100644 --- a/docs/architecture/05-engine-backends.md +++ b/docs/architecture/05-engine-backends.md @@ -151,4 +151,23 @@ All LLM requests and responses are logged to the `agent_run_llm_calls` table, tr - Duration - Tool calls made +## Worker-image runtime baseline (MNG-1055) + +Native-tool engines do not provision their own shell environment — they execute against whatever the worker image ships. CASCADE intentionally bakes a fixed baseline into `Dockerfile.worker` so agents can rely on these tools without per-project `.cascade/setup.sh` workarounds: + +| Tool | Where it lives | Notes | +|---|---|---| +| `python` / `python3` | apt `python3` + `python-is-python3` | Both names resolve to the same Debian-owned Python 3. Use either for `python -c 'import json'` etc.; do not `pip install` at runtime. | +| `jq`, `rg`, `fd`, `git`, `tmux`, `cascade-tools`, `ast-grep` (`sg`) | apt + curl + npm install in the worker image | Prefer these over hand-rolled equivalents in shell commands. | +| Playwright Chromium | `npm install -g @playwright/test@ && playwright install --with-deps chromium` | Browser cache lives at `$PLAYWRIGHT_BROWSERS_PATH` (`/ms-playwright`), readable and writable by the unprivileged `node` user. | +| Agent engine CLIs | `@anthropic-ai/claude-code`, `@openai/codex`, `opencode-ai` | All pinned versions. | + +**Env propagation.** Native-tool engines sanitize subprocess env via `src/backends/shared/envFilter.ts`. `PLAYWRIGHT_BROWSERS_PATH` is allowlisted as an exact match so the bake-in cache is reachable from agent shells; the broader `PLAYWRIGHT_*` prefix is intentionally not allowed, preserving the defense-in-depth posture for the rest of Playwright's env surface. + +**Smoke coverage.** Every build path validates the baseline via `tests/docker/worker-runtime-tools/run-test.sh`. CI (`.github/workflows/ci.yml` → `docker-build-check`) runs the script against `cascade-worker:ci-check` after `docker build`. The deploy workflows (`.github/workflows/deploy{,-dev}.yml`) run the same script against the freshly built worker image **before** pushing `:latest` / `:dev` / SHA tags, so a regression that would break agents in production blocks the publish step. + +**Image size.** Chromium + system deps significantly increase the worker image. We pin one Chromium revision and one `@playwright/test` version; target repositories that need a materially different revision can run their normal `npx playwright install chromium` flow in `.cascade/setup.sh`, which writes the missing revision into the same `$PLAYWRIGHT_BROWSERS_PATH` cache as the runtime `node` user. Other browsers (Firefox, WebKit) are intentionally not installed. + +**Agent visibility.** The list of guaranteed tools is surfaced to agents in the native-tool system prompt (`src/backends/shared/nativeToolPrompts.ts`, "Guaranteed runtime tools" section). Adding a new tool to the worker image should always be paired with an update to that section so agents reach for the new capability instead of working around its absence. + For further details on adding a new engine, see [`docs/adding-engines.md`](../adding-engines.md). diff --git a/docs/getting-started.md b/docs/getting-started.md index 68da41ef3..a744b6d44 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -7,7 +7,7 @@ This guide walks you through setting up Cascade using Docker Compose — from ze ## Prerequisites - **Docker** and **Docker Compose** (v2+) -- ~6 GB disk space (the worker image includes Claude Code CLI and other agent tools) +- ~8 GB disk space (the worker image ships agent CLIs, a Python shim, and a shared Playwright Chromium cache at `$PLAYWRIGHT_BROWSERS_PATH=/ms-playwright` — see [engine-backends](./architecture/05-engine-backends.md#worker-image-runtime-baseline-mng-1055)) - A GitHub repository you want Cascade to work on - At least one LLM API key (OpenRouter, Anthropic, or OpenAI) or a Claude Max subscription diff --git a/src/backends/shared/envFilter.ts b/src/backends/shared/envFilter.ts index 0505bbbea..676c582ea 100644 --- a/src/backends/shared/envFilter.ts +++ b/src/backends/shared/envFilter.ts @@ -86,6 +86,16 @@ export const SHARED_ALLOWED_ENV_EXACT = new Set([ 'NO_COLOR', 'TERM_PROGRAM', 'COLORTERM', + + // Worker-image runtime baseline (MNG-1055). The Docker image bakes a + // shared Playwright Chromium cache at /ms-playwright and exports + // `PLAYWRIGHT_BROWSERS_PATH=/ms-playwright`. Native-tool engines sanitize + // subprocess env via this allowlist before spawning shell commands, so + // without an exact-match entry the var is dropped and `playwright launch + // chromium` re-downloads (or fails when offline). Only the exact name is + // allowlisted — broad `PLAYWRIGHT_*` prefix matching would weaken the + // defense-in-depth posture for the rest of Playwright's env surface. + 'PLAYWRIGHT_BROWSERS_PATH', ]); /** Prefix patterns — any var starting with one of these passes through. */ diff --git a/src/backends/shared/nativeToolPrompts.ts b/src/backends/shared/nativeToolPrompts.ts index d9d9bd210..163db6430 100644 --- a/src/backends/shared/nativeToolPrompts.ts +++ b/src/backends/shared/nativeToolPrompts.ts @@ -15,6 +15,15 @@ You are operating in a native-tool environment, not a gadget/function-call envir - If you catch yourself composing a pseudo tool call in plain text, stop and use the real tool instead. - Trello, JIRA, and GitHub attachment URLs require backend authentication. NEVER curl, wget, or HTTP-fetch them — they return an authorization error. Work item images are pre-fetched and available either as images in your conversation context or as files under \`.cascade/context/images/\` — use whichever is present; never fetch the original URLs. +## Guaranteed runtime tools + +The worker image bakes the following baseline tools so you can rely on them in any shell command without installing anything: + +- \`python\` and \`python3\` — interchangeable; both resolve to the same Debian-owned Python 3. Use either for ad-hoc JSON shaping (\`python -c 'import json; ...'\`), small parsing scripts, or library smoke checks. Do NOT \`pip install\` packages at runtime — the image's stdlib is the contract. +- \`jq\`, \`rg\` (ripgrep), \`fd\`, \`git\`, \`tmux\` — preferred for JSON queries, content/file search, version-control, and persistent shell sessions respectively. +- \`cascade-tools\` — the CASCADE CLI documented in the "CASCADE Tools" section below. Use it (not \`gh\` / raw curl) for PM, SCM, and session operations. +- Playwright Chromium — installed at \`$PLAYWRIGHT_BROWSERS_PATH\` (\`/ms-playwright\`). \`@playwright/test\` is available globally; \`NODE_PATH=$(npm root -g) node -e 'require("@playwright/test")...'\` or \`npx playwright test\` from a repo with a local pin can use the cache, and project setup can install a missing pinned revision into that writable path. + ## Termination protocol When you have completed all required side-effects for this task — per the hooks declared on this agent (e.g. commits pushed, PR opened, review submitted, checklist created, PM comment posted) — call the \`Finish\` tool with a one-paragraph summary of what you did. diff --git a/tests/docker/worker-runtime-tools/run-test.sh b/tests/docker/worker-runtime-tools/run-test.sh new file mode 100755 index 000000000..a58006ada --- /dev/null +++ b/tests/docker/worker-runtime-tools/run-test.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# Smoke-test the worker image's baseline native-session runtime tools. +# +# This is the single source of truth for "what does CASCADE guarantee is +# already installed in a worker shell" — CI (Dockerfile.worker build job) and +# the deploy pipelines (`.github/workflows/deploy{,-dev}.yml`) all invoke +# this same script. Failure here is intentionally noisy so that a missing +# Python shim, a missing Playwright Chromium browser, or a broken +# `PLAYWRIGHT_BROWSERS_PATH` blocks image promotion before the broken image +# reaches production agents. +# +# Usage: +# WORKER_IMAGE=cascade-worker:ci-check tests/docker/worker-runtime-tools/run-test.sh +# +# See MNG-1055 and the friction clusters cited in the comment in +# Dockerfile.worker for the motivating bug pattern. +set -euo pipefail + +WORKER_IMAGE="${WORKER_IMAGE:-cascade-worker:ci-check}" + +echo "=== Worker Runtime Tools Smoke Test ===" +echo "Worker image : $WORKER_IMAGE" +echo "" + +# 1. Python shim — `python` and `python3` both work, and the std-lib import +# path is healthy. Repeated friction reports (see Dockerfile.worker comment +# for the full cluster) traced to agents calling `python -c '...'` and +# hitting `command not found` on bare `python3`-only images. +docker run --rm "$WORKER_IMAGE" bash -lc ' + set -e + echo "--- Python shim check ---" + python3 --version + python --version + python -c "import json; print(json.dumps({\"ok\": True}))" + echo "" +' + +# 2. Playwright Chromium — the cache lives at $PLAYWRIGHT_BROWSERS_PATH and +# is readable/writable by the unprivileged `node` user the worker switches to. +# Write access is deliberate: project `.cascade/setup.sh` scripts inherit this +# env var, and repos pinned to a different Playwright revision must be able to +# install the missing browser revision into the shared cache. The native-tool +# env filter (src/backends/shared/envFilter.ts) allowlists this single exact +# variable; broader `PLAYWRIGHT_*` propagation is intentionally off to keep the +# defense-in-depth posture for the rest of Playwright's env surface. +# +# `NODE_PATH=$(npm root -g)` is the documented way to require globally +# installed packages from a one-off Node invocation — `@playwright/test` +# is installed globally in the worker image, not in any per-agent +# workspace. +docker run --rm "$WORKER_IMAGE" bash -lc ' + set -e + echo "--- Playwright runtime context ---" + echo "node: $(node --version)" + echo "PLAYWRIGHT_BROWSERS_PATH: ${PLAYWRIGHT_BROWSERS_PATH:-}" + if [ -z "${PLAYWRIGHT_BROWSERS_PATH:-}" ]; then + echo "FAIL: PLAYWRIGHT_BROWSERS_PATH is not set in the worker image" + exit 1 + fi + if [ ! -d "$PLAYWRIGHT_BROWSERS_PATH" ]; then + echo "FAIL: PLAYWRIGHT_BROWSERS_PATH ($PLAYWRIGHT_BROWSERS_PATH) does not exist" + exit 1 + fi + if [ ! -w "$PLAYWRIGHT_BROWSERS_PATH" ]; then + echo "FAIL: PLAYWRIGHT_BROWSERS_PATH ($PLAYWRIGHT_BROWSERS_PATH) is not writable by $(id -un)" + exit 1 + fi + mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/.cascade-write-test" + rmdir "$PLAYWRIGHT_BROWSERS_PATH/.cascade-write-test" + NODE_PATH=$(npm root -g) node -e "console.log(\"playwright version: \" + require(\"@playwright/test/package.json\").version)" + echo "" + + echo "--- Playwright Chromium launch check ---" + NODE_PATH=$(npm root -g) node -e " + const { chromium } = require(\"@playwright/test\"); + (async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + await page.setContent(\"

cascade-worker-ok

\"); + const text = await page.textContent(\"#cascade\"); + if (text !== \"cascade-worker-ok\") { + throw new Error(\"Unexpected DOM text: \" + text); + } + await browser.close(); + console.log(\"Playwright Chromium launch OK\"); + })().catch((err) => { + console.error(\"FAIL: Playwright Chromium launch failed\"); + console.error(err && err.stack ? err.stack : err); + process.exit(1); + }); + " +' + +echo "" +echo "=== Worker runtime tools smoke test PASSED ===" diff --git a/tests/unit/backends/shared-envBuilder.test.ts b/tests/unit/backends/shared-envBuilder.test.ts index 41441774d..77930cf26 100644 --- a/tests/unit/backends/shared-envBuilder.test.ts +++ b/tests/unit/backends/shared-envBuilder.test.ts @@ -194,6 +194,52 @@ describe('buildEngineEnv', () => { }); }); + // MNG-1055: the worker image exports PLAYWRIGHT_BROWSERS_PATH=/ms-playwright + // so the prebuilt Chromium cache is reachable. The native-tool env filter + // must forward that exact variable to engine subprocesses; otherwise + // `playwright launch chromium` in agent shell commands fails with + // `Executable doesn't exist` and the cache is wasted. Pinning this in the + // env-builder test (not just the env-filter test) catches future + // regressions where someone tightens the engine-specific allowlist + // without touching the shared one. + describe('PLAYWRIGHT_BROWSERS_PATH propagation (MNG-1055)', () => { + it('forwards PLAYWRIGHT_BROWSERS_PATH from process.env through buildEngineEnv', () => { + process.env.PLAYWRIGHT_BROWSERS_PATH = '/ms-playwright'; + const env = buildEngineEnv({ allowedEnvExact: new Set() }); + expect(env.PLAYWRIGHT_BROWSERS_PATH).toBe('/ms-playwright'); + }); + + it('forwards PLAYWRIGHT_BROWSERS_PATH for every engine allowlist shape', () => { + process.env.PLAYWRIGHT_BROWSERS_PATH = '/ms-playwright'; + + const claudeEnv = buildEngineEnv({ + allowedEnvExact: new Set(['CLAUDE_CODE_OAUTH_TOKEN', 'ANTHROPIC_API_KEY']), + }); + expect(claudeEnv.PLAYWRIGHT_BROWSERS_PATH).toBe('/ms-playwright'); + + const codexEnv = buildEngineEnv({ + allowedEnvExact: new Set(['OPENAI_API_KEY']), + extraVars: { CI: 'true', CODEX_DISABLE_UPDATE_NOTIFIER: '1' }, + }); + expect(codexEnv.PLAYWRIGHT_BROWSERS_PATH).toBe('/ms-playwright'); + + const opencodeEnv = buildEngineEnv({ + allowedEnvExact: new Set(['OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'OPENROUTER_API_KEY']), + extraVars: { CI: 'true' }, + }); + expect(opencodeEnv.PLAYWRIGHT_BROWSERS_PATH).toBe('/ms-playwright'); + }); + + it('keeps blocked secret vars filtered even with PLAYWRIGHT_BROWSERS_PATH present', () => { + process.env.PLAYWRIGHT_BROWSERS_PATH = '/ms-playwright'; + const env = buildEngineEnv({ allowedEnvExact: new Set() }); + expect(env.PLAYWRIGHT_BROWSERS_PATH).toBe('/ms-playwright'); + expect(env.DATABASE_URL).toBeUndefined(); + expect(env.REDIS_URL).toBeUndefined(); + expect(env.MY_SECRET).toBeUndefined(); + }); + }); + describe('security invariants', () => { it('always blocks all SHARED_BLOCKED_ENV_EXACT vars regardless of allowedEnvExact', () => { // Even if someone accidentally adds a blocked key to the engine allowlist, diff --git a/tests/unit/backends/shared-envFilter.test.ts b/tests/unit/backends/shared-envFilter.test.ts index 13f31a33c..af7e705dd 100644 --- a/tests/unit/backends/shared-envFilter.test.ts +++ b/tests/unit/backends/shared-envFilter.test.ts @@ -178,6 +178,34 @@ describe('SHARED_ALLOWED_ENV_EXACT', () => { expect(result[GITHUB_ACK_COMMENT_ID_ENV_VAR]).toBe('12345'); }); + // MNG-1055: the worker image bakes a shared Playwright Chromium cache at + // /ms-playwright and exports PLAYWRIGHT_BROWSERS_PATH=/ms-playwright. The + // native-tool env filter must forward this single exact variable so + // `playwright launch chromium` from agent shells uses the prebuilt cache. + // The broader `PLAYWRIGHT_*` prefix is deliberately NOT allowlisted — we + // pass exactly the browser cache path and nothing else. + describe('PLAYWRIGHT_BROWSERS_PATH allowlist (MNG-1055)', () => { + it('is in SHARED_ALLOWED_ENV_EXACT', () => { + expect(SHARED_ALLOWED_ENV_EXACT.has('PLAYWRIGHT_BROWSERS_PATH')).toBe(true); + }); + + it('survives filterProcessEnv round-trip', () => { + const result = filterProcessEnv({ PLAYWRIGHT_BROWSERS_PATH: '/ms-playwright' }); + expect(result.PLAYWRIGHT_BROWSERS_PATH).toBe('/ms-playwright'); + }); + + it('does not silently allow other PLAYWRIGHT_* variables', () => { + const result = filterProcessEnv({ + PLAYWRIGHT_BROWSERS_PATH: '/ms-playwright', + PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: '1', + PLAYWRIGHT_TEST_BASE_URL: 'https://example.com', + }); + expect(result.PLAYWRIGHT_BROWSERS_PATH).toBe('/ms-playwright'); + expect(result.PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD).toBeUndefined(); + expect(result.PLAYWRIGHT_TEST_BASE_URL).toBeUndefined(); + }); + }); + // Regression net for prod incidents MNG-741 / MNG-736 / MNG-739 (2026-05-12): // `sidecarManager` injected `CASCADE_PM_WRITE_SIDECAR_PATH` into projectSecrets // but the allowlist here dropped it on the way to the subprocess, so the diff --git a/tests/unit/backends/shared-nativeToolPrompts.test.ts b/tests/unit/backends/shared-nativeToolPrompts.test.ts index 16e6daac4..8ff792d8e 100644 --- a/tests/unit/backends/shared-nativeToolPrompts.test.ts +++ b/tests/unit/backends/shared-nativeToolPrompts.test.ts @@ -478,6 +478,61 @@ describe('buildSystemPrompt', () => { const result = buildSystemPrompt('Agent prompt.', []); expect(result).toContain('Never write pseudo tool calls'); }); + + // MNG-1055: the worker image guarantees a baseline of native-session + // tools — Python shim, jq/rg/fd/git/tmux/cascade-tools, and a shared + // Playwright Chromium cache at $PLAYWRIGHT_BROWSERS_PATH. The system + // prompt must communicate that contract so agents reach for these + // directly instead of trying to install or work around them. Pinned + // here so future trim-the-prompt edits do not silently drop the + // guarantees that the friction clusters (MNG-887…1044, MNG-998, + // MNG-1048) were originally filed about. + describe('runtime-tool guarantees (MNG-1055)', () => { + it('lists Python shim guarantee with both python and python3 names', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('Guaranteed runtime tools'); + expect(result).toContain('`python`'); + expect(result).toContain('`python3`'); + }); + + it('lists the other baseline shell tools agents should reach for', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('`jq`'); + expect(result).toContain('`rg`'); + expect(result).toContain('`fd`'); + expect(result).toContain('`git`'); + expect(result).toContain('`tmux`'); + expect(result).toContain('`cascade-tools`'); + }); + + it('points at the shared Playwright Chromium cache via PLAYWRIGHT_BROWSERS_PATH', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('Playwright'); + expect(result).toContain('$PLAYWRIGHT_BROWSERS_PATH'); + }); + + it('runtime guarantees render even when no cascade-tools are wired', () => { + // The guarantees are part of the static execution rules, not the + // CASCADE Tools section — so they render whether or not the + // caller passes a tool manifest. This keeps the contract visible + // for engines that mount zero cascade-tools (early debug runs). + const noTools = buildSystemPrompt('Agent prompt.', []); + const withTools = buildSystemPrompt('Agent prompt.', [makeManifest()]); + expect(noTools).toContain('Guaranteed runtime tools'); + expect(withTools).toContain('Guaranteed runtime tools'); + }); + + it('does not alter the rendered cascade-tools CLI documentation', () => { + // Pins that the prompt addition is purely additive — the + // CreatePRReview / ReadWorkItem command bodies the generator + // emits are unchanged. Catches accidental reorderings that + // would push agents back to the pre-014 pseudo-tool surface. + const result = buildSystemPrompt('Agent prompt.', [makeManifest({ name: 'ReadWorkItem' })]); + expect(result).toContain('### ReadWorkItem'); + expect(result).toContain('cascade-tools pm read-work-item'); + expect(result).toContain('--workItemId '); + }); + }); }); // ───────── buildTaskPrompt ───────── From 3043f771bb72281a795d66d5a61af11a0fa37050 Mon Sep 17 00:00:00 2001 From: Cascade Bot Date: Mon, 18 May 2026 19:27:08 +0000 Subject: [PATCH 2/4] fix(cascade-tools): harden multiline text and large diff I/O for shell agents --- CHANGELOG.md | 6 + CLAUDE.md | 4 +- docs/architecture/07-gadgets.md | 8 + src/agents/contracts/index.ts | 15 + src/agents/prompts/templates/review.eta | 4 +- src/agents/shared/prFormatting.ts | 6 + src/backends/shared/nativeToolPrompts.ts | 43 +++ src/cli/scm/get-pr-diff.ts | 1 + src/gadgets/README.md | 14 + src/gadgets/github/core/getPRDiff.ts | 139 +++++++-- src/gadgets/github/definitions.ts | 37 ++- src/gadgets/pm/definitions.ts | 12 +- src/gadgets/shared/cli/params.ts | 36 +++ src/gadgets/shared/cliCommandFactory.ts | 12 +- src/gadgets/shared/gadgetFactory.ts | 6 +- src/gadgets/shared/manifestGenerator.ts | 16 ++ src/gadgets/shared/toolDefinition.ts | 11 + .../backends/shared-nativeToolPrompts.test.ts | 154 ++++++++++ tests/unit/backends/toolManifests.test.ts | 5 +- tests/unit/cli/file-input-flags.test.ts | 56 ++++ tests/unit/cli/scm/scm-commands.test.ts | 43 ++- .../gadgets/github/core/getPRDiff.test.ts | 264 +++++++++++++++++- tests/unit/gadgets/shared/cli/params.test.ts | 134 ++++++++- tests/unit/gadgets/shared/factories.test.ts | 237 ++++++++++++++++ 24 files changed, 1218 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00df6bd35..4916475c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable user-visible changes to CASCADE are documented here. The format is l ## Unreleased +### Fixed + +- **`cascade-tools` multiline text and large diff I/O are now hardened against shell-quoting footguns and stdout truncation** ([MNG-1059](https://linear.app/issue/MNG-1059)). The shared CLI factory at `src/gadgets/shared/cli/params.ts` now rejects invocations that pass `--*-file -` for two or more file-input flags (e.g. `--body-file - --comments-file -`) before any `readFileSync(0, ...)` call — stdin (fd 0) can only be drained once per process, and the previous behavior silently truncated one of the two agent payloads. The rejection emits a structured `flag-parse` error envelope (`error.flag: "body-file,comments-file"`, `hint: "Pass at most one --*-file -; for the others, write the payload to a temp file and pass ---file ."`) so agents can self-correct on the next attempt. Direct file paths remain pairwise-compatible — `--body-file - --comments-file /tmp/comments.json` and `--body-file /tmp/body.md --comments-file -` both work as before. The native-tool system prompt now renders a "cascade-tools shell-safety rules" section that documents the one-stdin-consumer invariant and provides safe heredoc / temp-file patterns for one and two payloads. The prompt renderer also suppresses inline `--body '...'` / `--text '...'` examples whose content contains backticks, code fences, `$(...)`, or newlines when a file-input companion is declared, redirecting the agent at the safer `--*-file ` form instead. File-input flag descriptions for `--body-file`, `--text-file`, `--description-file`, `--details-file`, and `--comments-file` explicitly call out markdown / multiline / backticks. Closes [MNG-908](https://linear.app/mongrel/issue/MNG-908), [MNG-910](https://linear.app/mongrel/issue/MNG-910), [MNG-917](https://linear.app/mongrel/issue/MNG-917), [MNG-1046](https://linear.app/mongrel/issue/MNG-1046). + +- **`cascade-tools scm get-pr-diff` gains an `--outputFile ` escape hatch for large diffs and one-line JSON patches** ([MNG-1045](https://linear.app/mongrel/issue/MNG-1045)). When `--outputFile` is set, the full multiline Markdown diff is written to the requested path on disk and stdout returns a compact JSON summary `{outputFile, fileCount, bytes, pathFilter}` instead of the raw payload — sidestepping terminal-truncation issues with hundreds-of-kilobytes one-line JSON diffs. Default behavior is preserved: without `--outputFile`, `get-pr-diff` returns the formatted Markdown directly. The review-agent skipped-files guidance now points operators at this form (`cascade-tools scm get-pr-diff --prNumber --path --outputFile /tmp/diff-.md`) when a file would otherwise truncate. The `--outputFile` flag is declared as `cliOnly: true` on the underlying ToolDefinition so it appears in the CLI + agent-facing manifest but is excluded from the SDK Gadget Zod schema (gadgets return strings in-process and cannot deliver a file path back through that contract). + ### Changed - **Worker image now ships a Python shim and a shared Playwright Chromium cache as native-session baseline tools** ([MNG-1055](https://linear.app/issue/MNG-1055)). `Dockerfile.worker` installs `python3` + `python-is-python3` so both `python` and `python3` resolve to the same Debian-owned interpreter (closing the friction cluster MNG-887/897/926/934/947/957/973/1010/1024/1033/1039/1044). It also installs a pinned `@playwright/test` plus Chromium with browser dependencies into `/ms-playwright` (closing MNG-998/1048), then makes that cache owned by the runtime `node` user so project `.cascade/setup.sh` scripts pinned to a different Playwright revision can install the missing Chromium revision into the inherited `$PLAYWRIGHT_BROWSERS_PATH`. The native-tool env filter (`src/backends/shared/envFilter.ts`) now allowlists `PLAYWRIGHT_BROWSERS_PATH` as an exact match so the cache is reachable from every native-tool engine subprocess; the broader `PLAYWRIGHT_*` prefix is intentionally left out to preserve the defense-in-depth env-allowlist posture. A new Docker smoke script (`tests/docker/worker-runtime-tools/run-test.sh`) validates `python`, `python3`, `python -c 'import json'`, the Playwright Chromium launch path, the env var, and node-user write access to the cache, and is wired into CI (`docker-build-check`) and both deploy workflows (`deploy.yml` / `deploy-dev.yml`) **before** the worker image is pushed, so a broken baseline cannot reach `:latest` / `:dev` tags. The native-tool system prompt (`src/backends/shared/nativeToolPrompts.ts`) exposes the guaranteed tools to agents under a new "Guaranteed runtime tools" section, and the engine-backends architecture doc, README, and Getting Started prerequisites describe the runtime baseline (including the image-size implication). diff --git a/CLAUDE.md b/CLAUDE.md index c4e4b9c42..d3fff389b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -148,10 +148,12 @@ The wedged-lock canary should never fire under normal operation. Its presence in Review agent receives a **compact per-file diff context**, not full file contents. Each changed file is a `### (, +N -M)` section with a unified diff hunk. Budget: `REVIEW_DIFF_CONTEXT_TOKEN_LIMIT` = 200k tokens, per-file cap 10%. -GitHub's changed-file API is used for file enumeration and change counts, but compact patch bodies come from the checked-out PR workspace via `git diff origin/...HEAD`. Files that can't fit or can't be locally verified (deleted, binary/no text patch, local diff failure/empty patch, oversized patch, or budget exhausted) are injected as `SKIPPED FILES` with instructions to fetch on demand via `cascade-tools scm get-pr-diff --prNumber --path `, `Read`, or `Grep`. +GitHub's changed-file API is used for file enumeration and change counts, but compact patch bodies come from the checked-out PR workspace via `git diff origin/...HEAD`. Files that can't fit or can't be locally verified (deleted, binary/no text patch, local diff failure/empty patch, oversized patch, or budget exhausted) are injected as `SKIPPED FILES` with instructions to fetch on demand via `cascade-tools scm get-pr-diff --prNumber --path `, `Read`, or `Grep`. Large or one-line JSON diffs that would truncate stdout should add `--outputFile /tmp/diff-.md` — the CLI writes the full multiline Markdown payload to disk and returns a compact `{outputFile, fileCount, bytes, pathFilter}` summary on stdout (MNG-1059 / MNG-1045). When review output misses something, check the `PR context prepared` log entry for `included` / `skipped` / `skipReasons`, `patchSources`, `totalDiffTokens`, `perFileTokenCap`, and `localGitMismatches` to confirm whether the file was visible to the agent and whether GitHub's API patch differed from the local patch. Also check context offload logs if the diff context was written under `.cascade/context/`. +**cascade-tools shell-safety contract** — MNG-1059. cascade-tools commands that accept markdown/multiline payloads (`--body`, `--text`, `--description`, `--details`, `--comments`) declare a `--*-file ` companion via `cli.fileInputAlternatives`. Agents are instructed in the system prompt to prefer the file form when content contains backticks, code fences, `$(...)`, or newlines — shells expand those tokens even inside single quotes once they layer through `bash -c`, and newlines break argv parsing. The shared CLI factory at `src/gadgets/shared/cli/params.ts:rejectMultipleStdinConsumers` enforces the single-stdin-consumer invariant: only one `--*-file -` per command. Passing two stdin consumers (e.g. `--body-file - --comments-file -`) returns a structured `flag-parse` envelope with `error.flag: "body-file,comments-file"` and a hint to write one payload to a temp file — *before* any `readFileSync(0, ...)` call. The native-tool system prompt also renders a "cascade-tools shell-safety rules" section with safe heredoc / temp-file patterns. Prompt example rendering suppresses inline `--body '...'` examples for shell-sensitive content (backticks / `$(...)` / newlines) when a file-input companion exists, redirecting agents at the safer `--*-file ` form. + ## Engines Default engine: `claude-code`. Alternatives: `codex`, `opencode`. diff --git a/docs/architecture/07-gadgets.md b/docs/architecture/07-gadgets.md index db0479424..d9e5be336 100644 --- a/docs/architecture/07-gadgets.md +++ b/docs/architecture/07-gadgets.md @@ -148,6 +148,14 @@ New domain commands should not add branches in these helpers. They declare behav Core functions passed to `createCLICommand()` own domain work only. On fatal runtime/API/provider failures they throw, and the shared factory converts that exception into the structured `{"success":false,"error":{"type":"runtime","message":"..."}}` stdout envelope plus exit code 1. A returned value is always serialized as successful `data`, so gadgets must not return sentinel error strings such as `Error reading work item: ...` for fatal failures. Non-fatal command states that are part of the contract, such as guarded PM move no-ops or friction retry queueing, remain successful returns. +### Shell-safety contract (MNG-1059) + +cascade-tools commands that accept text bodies, descriptions, or markdown payloads declare a `--*-file ` companion via `cli.fileInputAlternatives` (`--body-file`, `--text-file`, `--description-file`, `--details-file`, `--comments-file`). Agents are instructed to prefer the file form for any content containing backticks, code fences, `$(...)`, or newlines — shells expand those tokens even inside single quotes when the command is layered through `bash -c`, and newlines break argv parsing. + +**Single-stdin-consumer invariant.** stdin (fd 0) can only be drained once per process. The shared CLI factory at `src/gadgets/shared/cli/params.ts` (`rejectMultipleStdinConsumers`) scans file-input flags for the literal `-` value and rejects any invocation with two or more stdin consumers — *before* any `readFileSync(0, ...)` call. The rejection emits a structured `flag-parse` error envelope so the agent can self-correct on the next attempt (write one payload to a temp file via `--*-file ` and stream the other via `--*-file -`). Direct file paths remain pairwise-compatible; only the dual-stdin combination is blocked. + +**Large-diff escape hatch.** `cascade-tools scm get-pr-diff` accepts an optional `--outputFile ` flag (`cliOnly: true`). When set, the full multiline Markdown diff is written to disk and stdout contains only a compact summary `{outputFile, fileCount, bytes, pathFilter}`. This sidesteps terminal-truncation issues with one-line JSON patches that can be hundreds of kilobytes (see MNG-1045). Default behavior is preserved: without `--outputFile`, `get-pr-diff` returns the formatted Markdown directly. + ## Session State `src/gadgets/sessionState.ts` diff --git a/src/agents/contracts/index.ts b/src/agents/contracts/index.ts index 990596352..e3ff5af59 100644 --- a/src/agents/contracts/index.ts +++ b/src/agents/contracts/index.ts @@ -49,6 +49,21 @@ export interface ToolManifestParameter { * prompt renderer and CLI help to show agents a runnable shape. */ example?: unknown; + /** + * MNG-1059: the canonical text parameter name this file-input flag is an + * alternative for. Present only on the synthesized `--*-file` manifest + * entries (e.g. `body-file` carries `fileInputFor: 'body'`). The prompt + * renderer uses this to point agents from a shell-sensitive direct flag at + * its safer companion. + */ + fileInputFor?: string; + /** + * MNG-1059: the file-input flag name agents should prefer when the payload + * for this direct parameter contains markdown, multiline text, backticks, + * code fences, `$(...)`, or other shell-sensitive tokens. Present on the + * direct text parameter (e.g. `body` carries `fileInputAlternative: 'body-file'`). + */ + fileInputAlternative?: string; } /** diff --git a/src/agents/prompts/templates/review.eta b/src/agents/prompts/templates/review.eta index 55583b973..e89b818f8 100644 --- a/src/agents/prompts/templates/review.eta +++ b/src/agents/prompts/templates/review.eta @@ -198,7 +198,9 @@ Use CreatePRReview with: - `body`: Summary of findings (or "LGTM" if no issues) - `comments`: Inline comments for specific issues -**CRITICAL**: Inline comments can ONLY be placed on files in the PR diff. If you comment on a file path not in the diff, or use line numbers outside the changed ranges, the entire review will fail. Only add inline comments for files you saw in GetPRDiffContext output or files you verified by fetching a skipped file on demand via `cascade-tools scm get-pr-diff --path`. +**CRITICAL**: Inline comments can ONLY be placed on files in the PR diff. If you comment on a file path not in the diff, or use line numbers outside the changed ranges, the entire review will fail. Only add inline comments for files you saw in GetPRDiffContext output or files you verified by fetching a skipped file on demand via `cascade-tools scm get-pr-diff --path`. For very large or one-line JSON diffs that would truncate stdout, add `--outputFile /tmp/diff-.md` and then `Read` that file — the CLI returns a compact summary while the full diff stays on disk. + +**Submitting markdown review bodies with backticks, code fences, or multi-line content**: pass `--body-file ` (or `--body-file -` heredoc) instead of inline `--body '...'` — shells expand backticks and `$(...)` even inside quotes, and newlines fight argv parsing. If you also need to pass `--comments` JSON, write the body to a temp file and stream the comments via stdin: `--body-file /tmp/body.md --comments-file -` (only one `--*-file -` per command — stdin can only be drained once). ### Review Body Format diff --git a/src/agents/shared/prFormatting.ts b/src/agents/shared/prFormatting.ts index f9d755670..dd20af220 100644 --- a/src/agents/shared/prFormatting.ts +++ b/src/agents/shared/prFormatting.ts @@ -255,6 +255,12 @@ export function formatSkippedFilesInjection( prNumber !== undefined ? ` • \`cascade-tools scm get-pr-diff --prNumber ${prNumber} --path \` to read the patch` : ' • `cascade-tools scm get-pr-diff --prNumber --path ` to read the patch', + // MNG-1059: large diffs / one-line JSON patches truncate stdout. Tell + // the agent about the file-output escape hatch so it does not lose + // content to terminal truncation. + prNumber !== undefined + ? ` • \`cascade-tools scm get-pr-diff --prNumber ${prNumber} --path --outputFile /tmp/diff-.md\` for large or one-line JSON diffs, then \`Read /tmp/diff-.md\`` + : ' • `cascade-tools scm get-pr-diff --prNumber --path --outputFile /tmp/diff-.md` for large or one-line JSON diffs, then `Read /tmp/diff-.md`', ' • `Read ` to read the post-PR file content', ' • `Grep ` to search inside the file', '', diff --git a/src/backends/shared/nativeToolPrompts.ts b/src/backends/shared/nativeToolPrompts.ts index 163db6430..7fec53ee0 100644 --- a/src/backends/shared/nativeToolPrompts.ts +++ b/src/backends/shared/nativeToolPrompts.ts @@ -15,6 +15,13 @@ You are operating in a native-tool environment, not a gadget/function-call envir - If you catch yourself composing a pseudo tool call in plain text, stop and use the real tool instead. - Trello, JIRA, and GitHub attachment URLs require backend authentication. NEVER curl, wget, or HTTP-fetch them — they return an authorization error. Work item images are pre-fetched and available either as images in your conversation context or as files under \`.cascade/context/images/\` — use whichever is present; never fetch the original URLs. +### cascade-tools shell-safety rules + +- For markdown, multiline text, backticks (\`\`\` \` \`\`\`), code fences (\`\`\` \`\`\`\`\`\`), \`$(...)\`, \`\\\`...\\\`\`, or other shell-sensitive content, prefer the \`--*-file \` form (e.g. \`--body-file /tmp/body.md\`) or a single heredoc via \`--*-file -\` over inline \`--body '...'\` / \`--text '...'\` / \`--description '...'\` arguments. Shells expand backticks and \`$(...)\` even inside single quotes when nested inside double quotes, and newlines fight your shell's argv parser. +- Only **one** \`--*-file -\` flag is allowed per command — stdin (fd 0) can only be drained once. Pass at most one payload through stdin; write the others to temp files and reference them with \`--*-file \`. The CLI rejects multiple \`-\` consumers with a structured \`flag-parse\` error before any read occurs. +- Pattern for a single heredoc: \`cascade-tools scm post-pr-comment --prNumber 42 --body-file - <<'EOF' \\n ...markdown... \\nEOF\`. +- Pattern for two payloads: write one to a temp file first, then pass the other via stdin. Example: \`cat >/tmp/body.md <<'EOF' \\n ...markdown body... \\nEOF\` followed by \`cascade-tools scm create-pr-review --prNumber 42 --event REQUEST_CHANGES --body-file /tmp/body.md --comments-file - <<'EOF' \\n [{"path":"src/x.ts","line":12,"body":"please handle null"}] \\nEOF\`. + ## Guaranteed runtime tools The worker image bakes the following baseline tools so you can rely on them in any shell command without installing anything: @@ -41,8 +48,33 @@ type PromptParamSchema = { items?: string; aliases?: readonly string[]; example?: unknown; + fileInputFor?: string; + fileInputAlternative?: string; }; +/** + * MNG-1059: a string example is "shell-sensitive" when it contains any of the + * tokens shells expand or that fight argv parsing — backticks, dollar-paren + * subshells, code fences, or any newline. When the parameter has a file-input + * alternative declared, the prompt renderer should suppress the inline example + * (which would teach the agent a footgun) and point at the safer companion. + * + * This deliberately does not flag every shell metachar (the `formatShellScalar` + * helper already wraps scalars containing spaces or other delimiters in single + * quotes). It targets the cluster surfaced in MNG-908 / MNG-910 / MNG-1046 / + * MNG-1048: markdown bodies and friction details containing backticks / code + * fences, multi-line PR descriptions, and command-substitution patterns that + * survive shell quoting in pathological ways. + */ +function isShellSensitiveExample(value: unknown): boolean { + if (typeof value !== 'string') return false; + if (value.includes('\n')) return true; + if (value.includes('`')) return true; + if (value.includes('$(') || value.includes('${')) return true; + if (value.includes('```')) return true; + return false; +} + function formatExampleInvocation(key: string, schema: PromptParamSchema): string | undefined { if (schema.example === undefined) return undefined; @@ -61,6 +93,17 @@ function formatExampleInvocation(key: string, schema: PromptParamSchema): string return examples.map((value) => `--${key} ${formatShellScalar(value)}`).join(' '); } + // MNG-1059: when a direct text param has a file-input companion and the + // example contains shell-sensitive tokens, redirect the agent at the file + // flag instead of teaching them a quoting footgun. + if ( + schema.type === 'string' && + schema.fileInputAlternative && + isShellSensitiveExample(schema.example) + ) { + return `--${schema.fileInputAlternative} # write the markdown/multiline content to a temp file (shell-sensitive: contains backticks, code fences, $(...), or newlines)`; + } + return `--${key} ${formatShellScalar(schema.example)}`; } diff --git a/src/cli/scm/get-pr-diff.ts b/src/cli/scm/get-pr-diff.ts index 7a1261e6d..3465e7fe3 100644 --- a/src/cli/scm/get-pr-diff.ts +++ b/src/cli/scm/get-pr-diff.ts @@ -8,5 +8,6 @@ export default createCLICommand(getPRDiffDef, async (params) => { params.repo as string, params.prNumber as number, params.path as string | undefined, + params.outputFile as string | undefined, ); }); diff --git a/src/gadgets/README.md b/src/gadgets/README.md index 9e1c40044..976612e0b 100644 --- a/src/gadgets/README.md +++ b/src/gadgets/README.md @@ -61,6 +61,20 @@ cli: { `-` as the file path reads from stdin. The generated flag is always optional (the direct flag remains accepted). +**One-stdin-consumer rule (MNG-1059).** stdin (fd 0) can only be drained once per process. If a command declares two file-input alternatives and the agent passes `--body-file - --comments-file -` in one invocation, the first `readFileSync(0, ...)` consumes every byte and the second consumer silently gets an empty string — one of the agent's payloads is dropped without any error. + +The shared CLI factory rejects this combination structurally *before* any read occurs: it emits a `flag-parse` envelope with `error.flag: 'body-file,comments-file'` and a hint to write one payload to a temp file. Direct file paths remain compatible — `--body-file - --comments-file /tmp/comments.json` and `--body-file /tmp/body.md --comments-file -` both work as before. The guard is automatic; gadget authors do not call it directly. + +Description text on `--*-file` flags should explicitly call out markdown / multiline / backticks / `$(...)` so the manifest renderer and `--help` output steer agents toward the file form before they hit a quoting bug. The single-stdin rule is automatically surfaced in the rendered system prompt under "cascade-tools shell-safety rules"; per-command examples should also model the safe pattern when relevant. + +### `cliOnly: true` on a parameter + +`ParameterDefinition.cliOnly` flags a parameter as a CLI-only surface — included in the CLI flags and in the agent-facing tool manifest (so the prompt shows it), but **excluded** from the Zod schema the SDK Gadget exposes. Use this for output destination flags that have no meaningful in-process equivalent. + +Reference: `getPRDiffDef.parameters.outputFile` in `src/gadgets/github/definitions.ts`. When `--outputFile ` is set, the CLI writes the full Markdown diff to disk and returns a compact summary (`{outputFile, fileCount, bytes, pathFilter}`) instead of pumping the full multi-megabyte payload through stdout. The SDK gadget would have no clean way to deliver that file back through its return-string contract, so the flag is marked `cliOnly`. + +`cliOnly` is mutually exclusive with `gadgetOnly` — the former excludes from the gadget; the latter excludes from the CLI + manifest. + ### `examples` A list of `{ params, comment, output? }` invocations. The first example that populates a given parameter becomes that parameter's **concrete example**, surfaced in three places: diff --git a/src/gadgets/github/core/getPRDiff.ts b/src/gadgets/github/core/getPRDiff.ts index 05a7bf08f..cdaa37ba7 100644 --- a/src/gadgets/github/core/getPRDiff.ts +++ b/src/gadgets/github/core/getPRDiff.ts @@ -1,35 +1,128 @@ +import { writeFileSync } from 'node:fs'; + import { githubClient } from '../../../github/client.js'; +type PRDiffFile = Awaited>[number]; + +/** + * Filter the changed-file list by an optional path. Matches either the file's + * current filename or its `previousFilename` (so renames are picked up). + */ +export function filterByPath(files: PRDiffFile[], path: string | undefined): PRDiffFile[] { + if (!path) return files; + return files.filter((f) => f.filename === path || f.previousFilename === path); +} + +/** + * Render a single changed file as a Markdown section: header + status + patch + * fenced as a `diff` block (or the binary/too-large marker when no patch). + * + * Exported for unit tests; the formatter is a pure function of the file shape. + */ +export function formatPRDiffFile(file: PRDiffFile): string { + const lines = [ + `## ${file.filename}`, + `Status: ${file.status} | +${file.additions} -${file.deletions}`, + ]; + if (file.previousFilename) { + lines.push(`Previous filename: ${file.previousFilename}`); + } + if (file.patch) { + lines.push('```diff', file.patch, '```'); + } else { + lines.push('[Binary file or too large to display]'); + } + return lines.join('\n'); +} + +/** + * Compose the full Markdown diff response from a (possibly path-filtered) file + * list. Returns the "empty matches" sentinel when the list is empty so callers + * can pass it straight through. + */ +export function formatPRDiffPayload(files: PRDiffFile[], path: string | undefined): string { + if (files.length === 0) { + return path ? `No changed file matched path: ${path}` : 'No files changed in this PR.'; + } + const formatted = files.map((f) => formatPRDiffFile(f)); + return `${files.length} file(s) changed:\n\n${formatted.join('\n\n')}`; +} + +/** + * MNG-1059: the compact JSON summary returned when `--output-file` is set. + * Keeps the raw multi-megabyte diff text off stdout (terminal truncation, + * agent context bloat) while preserving the cascade-tools JSON contract. + */ +export interface PRDiffFileOutputSummary { + outputFile: string; + fileCount: number; + bytes: number; + pathFilter?: string; +} + +/** + * Default mode (`outputFile` unset): returns the formatted Markdown diff string, + * or an `Error fetching PR diff: ` sentinel when the GitHub call throws — + * preserved for backwards compatibility with existing callers and tests. + * + * File-output mode (`outputFile` set, MNG-1059): writes the formatted Markdown + * to the requested path and returns a compact summary instead of the full + * payload. In this mode, runtime errors are thrown so the CLI factory's + * `runtime` envelope path surfaces them with structure agents can act on. + * + * Overloaded so callers that omit `outputFile` (the SDK gadget path) see a + * `Promise` return type, while callers that pass `outputFile` get + * `Promise`. + */ +export function getPRDiff( + owner: string, + repo: string, + prNumber: number, + path?: string, +): Promise; +export function getPRDiff( + owner: string, + repo: string, + prNumber: number, + path: string | undefined, + outputFile: string, +): Promise; +export function getPRDiff( + owner: string, + repo: string, + prNumber: number, + path?: string, + outputFile?: string, +): Promise; export async function getPRDiff( owner: string, repo: string, prNumber: number, path?: string, -): Promise { + outputFile?: string, +): Promise { + if (outputFile) { + // MNG-1059: file-output mode follows the README convention — fatal + // failures throw so the CLI factory wraps them in the structured + // `runtime` error envelope. Returning a sentinel here would conflict + // with the summary-object return shape. + const files = await githubClient.getPRDiff(owner, repo, prNumber); + const filteredFiles = filterByPath(files, path); + const payload = formatPRDiffPayload(filteredFiles, path); + writeFileSync(outputFile, payload, 'utf-8'); + const summary: PRDiffFileOutputSummary = { + outputFile, + fileCount: filteredFiles.length, + bytes: Buffer.byteLength(payload, 'utf-8'), + }; + if (path !== undefined) summary.pathFilter = path; + return summary; + } + try { const files = await githubClient.getPRDiff(owner, repo, prNumber); - const filteredFiles = path - ? files.filter((f) => f.filename === path || f.previousFilename === path) - : files; - - if (filteredFiles.length === 0) { - return path ? `No changed file matched path: ${path}` : 'No files changed in this PR.'; - } - - const formatted = filteredFiles.map((f) => { - const lines = [`## ${f.filename}`, `Status: ${f.status} | +${f.additions} -${f.deletions}`]; - if (f.previousFilename) { - lines.push(`Previous filename: ${f.previousFilename}`); - } - if (f.patch) { - lines.push('```diff', f.patch, '```'); - } else { - lines.push('[Binary file or too large to display]'); - } - return lines.join('\n'); - }); - - return `${filteredFiles.length} file(s) changed:\n\n${formatted.join('\n\n')}`; + const filteredFiles = filterByPath(files, path); + return formatPRDiffPayload(filteredFiles, path); } catch (error) { const message = error instanceof Error ? error.message : String(error); return `Error fetching PR diff: ${message}`; diff --git a/src/gadgets/github/definitions.ts b/src/gadgets/github/definitions.ts index 36b8ebc56..5a456df77 100644 --- a/src/gadgets/github/definitions.ts +++ b/src/gadgets/github/definitions.ts @@ -148,7 +148,8 @@ If hooks fail, the full output will be shown.`, { paramName: 'body', fileFlag: 'body-file', - description: 'Read PR body from file (use - for stdin)', + description: + 'Read PR body from file (use - for stdin). Strongly preferred over --body for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, @@ -240,14 +241,15 @@ export const createPRReviewDef: ToolDefinition = { { paramName: 'body', fileFlag: 'body-file', - description: 'Read review body from file (use - for stdin)', + description: + 'Read review body from file (use - for stdin). Strongly preferred over --body for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, { paramName: 'comments', fileFlag: 'comments-file', parseAs: 'json', description: - 'Read --comments JSON from file (use - for stdin). Prefer this for long payloads.', + 'Read --comments JSON from file (use - for stdin). Prefer this for long JSON payloads. Note: cannot pair `--body-file -` AND `--comments-file -` in one invocation — stdin is single-consumer; write one payload to a temp file and pass --*-file .', }, ], }, @@ -302,7 +304,7 @@ export const getPRDetailsDef: ToolDefinition = { export const getPRDiffDef: ToolDefinition = { name: 'GetPRDiff', description: - 'Get the unified diff of all file changes in a GitHub pull request. Shows each file with additions, deletions, and the patch content.', + 'Get the unified diff of all file changes in a GitHub pull request. Shows each file with additions, deletions, and the patch content. For large diffs or one-line JSON patches that would truncate stdout, use --output-file to write the full diff to disk and return a compact summary.', timeoutMs: 30000, parameters: { comment: { @@ -334,6 +336,13 @@ export const getPRDiffDef: ToolDefinition = { 'Optional changed-file path to fetch. Matches either the current filename or previous filename for renames.', optional: true, }, + outputFile: { + type: 'string', + describe: + 'Optional path: when set, write the raw multiline diff text to this file and return a compact JSON summary {outputFile, fileCount, bytes, pathFilter} instead of the full payload. Use this for large diffs / one-line JSON patches that would truncate stdout.', + optional: true, + cliOnly: true, + }, }, examples: [ { @@ -345,6 +354,17 @@ export const getPRDiffDef: ToolDefinition = { }, comment: 'Get all file changes in PR #42', }, + { + params: { + comment: 'Reviewing a large single-file diff that would truncate stdout', + owner: 'acme', + repo: 'myapp', + prNumber: 42, + path: 'src/big-generated.json', + outputFile: '/tmp/pr-42-diff.md', + }, + comment: 'Stream a large diff to disk and read it on demand', + }, ], cli: { autoResolved: ownerRepoAutoResolved, @@ -496,7 +516,8 @@ export const postPRCommentDef: ToolDefinition = { { paramName: 'body', fileFlag: 'body-file', - description: 'Read comment body from file (use - for stdin)', + description: + 'Read comment body from file (use - for stdin). Strongly preferred over --body for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, @@ -555,7 +576,8 @@ export const updatePRCommentDef: ToolDefinition = { { paramName: 'body', fileFlag: 'body-file', - description: 'Read comment body from file (use - for stdin)', + description: + 'Read comment body from file (use - for stdin). Strongly preferred over --body for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, @@ -620,7 +642,8 @@ export const replyToReviewCommentDef: ToolDefinition = { { paramName: 'body', fileFlag: 'body-file', - description: 'Read reply body from file (use - for stdin)', + description: + 'Read reply body from file (use - for stdin). Strongly preferred over --body for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, diff --git a/src/gadgets/pm/definitions.ts b/src/gadgets/pm/definitions.ts index 0564a9a57..ff6017f35 100644 --- a/src/gadgets/pm/definitions.ts +++ b/src/gadgets/pm/definitions.ts @@ -67,7 +67,8 @@ export const postCommentDef: ToolDefinition = { { paramName: 'text', fileFlag: 'text-file', - description: 'Read comment text from file (use - for stdin)', + description: + 'Read comment text from file (use - for stdin). Strongly preferred over --text for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, @@ -115,7 +116,8 @@ export const updateWorkItemDef: ToolDefinition = { { paramName: 'description', fileFlag: 'description-file', - description: 'Read description from file (use - for stdin)', + description: + 'Read description from file (use - for stdin). Strongly preferred over --description for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, @@ -159,7 +161,8 @@ export const createWorkItemDef: ToolDefinition = { { paramName: 'description', fileFlag: 'description-file', - description: 'Read description from file (use - for stdin)', + description: + 'Read description from file (use - for stdin). Strongly preferred over --description for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, @@ -216,7 +219,8 @@ export const reportFrictionDef: ToolDefinition = { { paramName: 'details', fileFlag: 'details-file', - description: 'Read friction details from file (use - for stdin)', + description: + 'Read friction details from file (use - for stdin). Strongly preferred over --details for markdown / multiline content with backticks, code fences, $(...) or newlines.', }, ], }, diff --git a/src/gadgets/shared/cli/params.ts b/src/gadgets/shared/cli/params.ts index 970f4322c..088680a89 100644 --- a/src/gadgets/shared/cli/params.ts +++ b/src/gadgets/shared/cli/params.ts @@ -16,6 +16,42 @@ function readFileInput(fileFlagValue: string): string { return fileFlagValue === '-' ? readFileSync(0, 'utf-8') : readFileSync(fileFlagValue, 'utf-8'); } +/** + * Spec MNG-1059: stdin (fd 0) can only be drained once per process. When a + * cascade-tools call passes `-` as the path for two or more file-input flags + * (e.g. `--body-file - --comments-file -`), the first `readFileSync(0, ...)` + * consumes every byte of stdin and the second consumer reads an empty string — + * silently truncating one of the agent's payloads. Detect that combination + * before any read occurs and emit a structured `flag-parse` envelope the agent + * can self-correct from on the next attempt. + * + * Returns `never` (via `emitCliError`) when two or more file-input flags are + * set to `-`; returns normally when at most one is. + */ +export function rejectMultipleStdinConsumers( + fileInputAlts: readonly FileInputAlternative[], + flags: ParsedFlags, + sink: ErrorSink, +): void { + const stdinFlags = fileInputAlts + .map((a) => a.fileFlag) + .filter((fileFlag) => flags[fileFlag] === '-'); + + if (stdinFlags.length < 2) return; + + emitCliError({ + type: 'flag-parse', + flag: stdinFlags.join(','), + message: `Multiple file-input flags read from stdin: ${stdinFlags + .map((f) => `--${f} -`) + .join(' and ')}. stdin can only be drained once per process.`, + hint: `Pass at most one --*-file -; for the others, write the payload to a temp file and pass ---file .`, + stdout: sink.stdout, + stderr: sink.stderr, + exit: sink.exit, + }); +} + function parseJsonOrError( raw: string, flag: string, diff --git a/src/gadgets/shared/cliCommandFactory.ts b/src/gadgets/shared/cliCommandFactory.ts index 548ae3727..cf349963c 100644 --- a/src/gadgets/shared/cliCommandFactory.ts +++ b/src/gadgets/shared/cliCommandFactory.ts @@ -15,7 +15,11 @@ import { deriveCLICommand } from './cli/commandNames.js'; import { buildSink } from './cli/errorSink.js'; import { buildOclifExamples } from './cli/examples.js'; import { buildFlagsRecord, collectBooleanFlagNames, collectCandidateFlags } from './cli/flags.js'; -import { resolveDirectParams, resolveGitRemoteParams } from './cli/params.js'; +import { + rejectMultipleStdinConsumers, + resolveDirectParams, + resolveGitRemoteParams, +} from './cli/params.js'; import { classifyParseError, isNonexistentFlagError, suggestFlag } from './cli/parseErrors.js'; import type { ParsedFlags } from './cli/types.js'; import { emitCliError } from './errorEnvelope.js'; @@ -128,6 +132,12 @@ export function createCLICommand( throw err; } const parsedFlags = flags as ParsedFlags; + + // Reject `--*-file -` for two or more file-input flags *before* the + // first stdin read, otherwise the first consumer drains fd 0 and the + // second silently gets an empty payload (MNG-1059). + rejectMultipleStdinConsumers(fileInputAlts, parsedFlags, sink); + const resolvedParams = resolveDirectParams( def, parsedFlags, diff --git a/src/gadgets/shared/gadgetFactory.ts b/src/gadgets/shared/gadgetFactory.ts index a282acb68..647865e53 100644 --- a/src/gadgets/shared/gadgetFactory.ts +++ b/src/gadgets/shared/gadgetFactory.ts @@ -107,12 +107,16 @@ function buildZodField(def: ParameterDefinition) { /** * Convert a ParameterMap to a Zod object schema. - * Gadget schemas include ALL parameters (including gadgetOnly params like `comment`). + * Gadget schemas include `gadgetOnly` params (like `comment`) but EXCLUDE + * `cliOnly` params (CLI-only output destination flags such as + * `get-pr-diff --output-file`) — those have no meaningful in-process semantics. */ export function buildZodSchema(parameters: ParameterMap) { const shape: Record> = {}; for (const [name, def] of Object.entries(parameters)) { + // MNG-1059: cliOnly params are not part of the SDK gadget surface. + if (def.cliOnly) continue; shape[name] = buildZodField(def); } diff --git a/src/gadgets/shared/manifestGenerator.ts b/src/gadgets/shared/manifestGenerator.ts index 1e2cd8faf..4f461000a 100644 --- a/src/gadgets/shared/manifestGenerator.ts +++ b/src/gadgets/shared/manifestGenerator.ts @@ -98,6 +98,12 @@ export function generateToolManifest( ): ToolManifest { const parameters: Record = {}; + // MNG-1059: build a quick lookup of paramName → fileFlag so the manifest + // can tell the prompt renderer "this direct param has a safer file companion." + const fileInputAltMap = new Map( + (def.cli?.fileInputAlternatives ?? []).map((a) => [a.paramName, a.fileFlag]), + ); + for (const [name, paramDef] of Object.entries(def.parameters)) { // Skip gadgetOnly params if (paramDef.gadgetOnly) continue; @@ -109,6 +115,13 @@ export function generateToolManifest( if (example !== undefined) { entry.example = example; } + // MNG-1059: point this direct text/array-of-object param at its + // safer file companion so the prompt renderer can steer agents away + // from shell-sensitive inline values. + const safeCompanion = fileInputAltMap.get(name); + if (safeCompanion) { + entry.fileInputAlternative = safeCompanion; + } parameters[name] = entry; } } @@ -122,6 +135,9 @@ export function generateToolManifest( parameters[alt.fileFlag] = { type: 'string', description, + // MNG-1059: cross-reference back to the direct text param so the + // prompt renderer can group `--body` and `--body-file` semantically. + fileInputFor: alt.paramName, // File flags are always optional (they are alternatives to the direct param) }; } diff --git a/src/gadgets/shared/toolDefinition.ts b/src/gadgets/shared/toolDefinition.ts index 3f3f64bad..10efdab9f 100644 --- a/src/gadgets/shared/toolDefinition.ts +++ b/src/gadgets/shared/toolDefinition.ts @@ -108,6 +108,17 @@ interface BaseParameterDefinition { * fields) and should NOT appear in the CLI flags or JSON Schema manifest. */ gadgetOnly?: boolean; + /** + * If `true`, this parameter is exposed only via the CLI surface (e.g. output + * destination flags like `--output-file` that have no in-process equivalent + * for an SDK gadget). The flag is included in the CLI, in the agent-facing + * tool manifest (so the prompt shows it), and in oclif help output — but is + * EXCLUDED from the Zod schema the SDK Gadget exposes. + * + * Mutually exclusive with `gadgetOnly`. Used by MNG-1059 for the + * `get-pr-diff --output-file ` escape hatch. + */ + cliOnly?: boolean; /** * CLI environment variable that can auto-populate this parameter. * Maps to `env` in oclif `Flags` definition. diff --git a/tests/unit/backends/shared-nativeToolPrompts.test.ts b/tests/unit/backends/shared-nativeToolPrompts.test.ts index 8ff792d8e..6792d71f9 100644 --- a/tests/unit/backends/shared-nativeToolPrompts.test.ts +++ b/tests/unit/backends/shared-nativeToolPrompts.test.ts @@ -433,6 +433,124 @@ describe('buildToolGuidance', () => { expect(paramLine).not.toContain('#'); }); }); + + // ------------------------------------------------------------------------- + // MNG-1059: shell-sensitive multiline example suppression + // ------------------------------------------------------------------------- + describe('formatParam — shell-sensitive example suppression (MNG-1059)', () => { + it('suppresses inline example for direct text param when example contains backticks AND a file-input companion is declared', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + body: { + type: 'string', + required: true, + example: 'Use `npm test` to verify', + fileInputAlternative: 'body-file', + }, + 'body-file': { + type: 'string', + fileInputFor: 'body', + description: 'Read body from file (use - for stdin)', + }, + }, + }), + ]); + + expect(result).not.toContain("--body 'Use `npm test` to verify'"); + expect(result).toContain('--body-file '); + expect(result).toContain('shell-sensitive'); + }); + + it('suppresses inline example for direct text param when example contains $(...) AND a file-input companion is declared', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + details: { + type: 'string', + required: true, + example: 'Detected $(whoami) running unexpectedly', + fileInputAlternative: 'details-file', + }, + 'details-file': { + type: 'string', + fileInputFor: 'details', + description: 'Read details from file (use - for stdin)', + }, + }, + }), + ]); + + expect(result).not.toContain("'Detected $(whoami)"); + expect(result).toContain('--details-file '); + }); + + it('suppresses inline example for direct text param when example contains a newline AND a file-input companion is declared', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + body: { + type: 'string', + required: true, + example: '## Summary\n\nMultiline content', + fileInputAlternative: 'body-file', + }, + 'body-file': { + type: 'string', + fileInputFor: 'body', + description: 'Read body from file (use - for stdin)', + }, + }, + }), + ]); + + expect(result).not.toContain("'## Summary"); + expect(result).toContain('--body-file '); + }); + + it('keeps inline example for shell-safe scalar values even when a file companion exists', () => { + const result = buildToolGuidance([ + makeManifest({ + parameters: { + body: { + type: 'string', + required: true, + example: 'LGTM', + fileInputAlternative: 'body-file', + }, + 'body-file': { + type: 'string', + fileInputFor: 'body', + description: 'Read body from file (use - for stdin)', + }, + }, + }), + ]); + + // LGTM is shell-safe — renderer keeps it inline (bare, no quotes). + expect(result).toContain('# example: --body LGTM'); + expect(result).not.toContain('--body-file #'); + }); + + it('keeps inline example for shell-sensitive content when NO file-input companion is declared', () => { + // Without a fileInputAlternative the renderer has no safer flag to + // redirect to — preserve the original behavior so existing gadgets + // without a file companion still render their examples. + const result = buildToolGuidance([ + makeManifest({ + parameters: { + body: { + type: 'string', + required: true, + example: 'Use `code` here', + }, + }, + }), + ]); + + expect(result).toContain("--body 'Use `code` here'"); + }); + }); }); // ───────── buildSystemPrompt ───────── @@ -533,6 +651,42 @@ describe('buildSystemPrompt', () => { expect(result).toContain('--workItemId '); }); }); + + // MNG-1059: the cascade-tools shell-safety rules must be visible in the + // rendered system prompt so agents know to prefer --*-file paths and to + // avoid passing two stdin consumers in a single command. + describe('cascade-tools shell-safety rules (MNG-1059)', () => { + it('renders the shell-safety section header', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('### cascade-tools shell-safety rules'); + }); + + it('tells agents to prefer --*-file for markdown / multiline / backticks', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('--body-file'); + expect(result).toContain('markdown'); + expect(result).toContain('multiline'); + }); + + it('warns about the one-stdin-consumer invariant', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('Only **one**'); + expect(result).toContain('stdin (fd 0)'); + expect(result).toContain('drained once'); + }); + + it('shows the heredoc pattern for one payload', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('post-pr-comment'); + expect(result).toContain('--body-file -'); + }); + + it('shows the two-payload pattern (one temp file + one heredoc)', () => { + const result = buildSystemPrompt('Agent prompt.', []); + expect(result).toContain('create-pr-review'); + expect(result).toContain('--comments-file -'); + }); + }); }); // ───────── buildTaskPrompt ───────── diff --git a/tests/unit/backends/toolManifests.test.ts b/tests/unit/backends/toolManifests.test.ts index 473e4eb6a..d4112f1a2 100644 --- a/tests/unit/backends/toolManifests.test.ts +++ b/tests/unit/backends/toolManifests.test.ts @@ -167,8 +167,11 @@ describe('getToolManifests', () => { body: { type: 'string', required: true }, 'body-file': { type: 'string', - description: 'Read reply body from file (use - for stdin)', }, }); + const params = replyToReviewComment?.parameters as Record; + // MNG-1059: description now calls out markdown / multiline guidance. + expect(params['body-file'].description).toContain('Read reply body from file'); + expect(params['body-file'].description).toContain('markdown'); }); }); diff --git a/tests/unit/cli/file-input-flags.test.ts b/tests/unit/cli/file-input-flags.test.ts index 2acf1a951..a08d20155 100644 --- a/tests/unit/cli/file-input-flags.test.ts +++ b/tests/unit/cli/file-input-flags.test.ts @@ -301,6 +301,36 @@ describe('CreatePR --body-file', () => { process.env = originalEnv; }); + // MNG-1059: file inputs preserve shell-sensitive markdown exactly. Confirms + // the friction cluster (MNG-908 / MNG-910 / MNG-1046 / MNG-1048) does not + // regress — content with backticks, code fences, $() reaches the core fn + // byte-for-byte identical to what's on disk. + it('preserves backticks, code fences, $() and multiline content exactly', async () => { + const shellSensitive = [ + '## Summary', + '', + 'Use `npm test` to verify the fix.', + '', + '```bash', + 'echo "$(date)" > /tmp/now', + '```', + '', + 'Reproducer: `cascade-tools scm get-pr-diff --prNumber $(gh pr view --json number -q .number)`', + ].join('\n'); + const filePath = writeTempFile('shell-sensitive.md', shellSensitive); + const cmd = new CreatePR( + ['--title', 'feat: x', '--head', 'feat/x', '--body-file', filePath], + mockConfig as never, + ); + await cmd.run(); + + expect(createPR).toHaveBeenCalledWith( + expect.objectContaining({ + body: shellSensitive, + }), + ); + }); + it('reads PR body from file', async () => { const filePath = writeTempFile('pr-body.md', '## Summary\n\nPR description'); const cmd = new CreatePR( @@ -385,6 +415,32 @@ describe('CreatePRReview --body-file', () => { process.env = originalEnv; }); + // MNG-1059: stdin (fd 0) is single-consumer. Passing `--body-file - --comments-file -` + // silently drained one of the two payloads (whichever ran first won; the + // other got an empty string). Reject this combination structurally before + // any read occurs. + it('rejects --body-file - AND --comments-file - in a single invocation', async () => { + const cmd = new CreatePRReview( + ['--prNumber', '42', '--event', 'COMMENT', '--body-file', '-', '--comments-file', '-'], + mockConfig as never, + ); + const logSpy = vi.spyOn(cmd, 'log'); + await expect(cmd.run()).rejects.toThrow(); + + // The core function must not be invoked — guard runs pre-read. + expect(createPRReview).not.toHaveBeenCalled(); + + const output = JSON.parse(logSpy.mock.calls[0][0] as string) as { + success: boolean; + error: { type: string; flag?: string; message?: string; hint?: string }; + }; + expect(output.success).toBe(false); + expect(output.error.type).toBe('flag-parse'); + expect(output.error.flag).toBe('body-file,comments-file'); + expect(output.error.message).toContain('stdin can only be drained once'); + expect(output.error.hint).toContain('temp file'); + }); + it('reads review body from file', async () => { const filePath = writeTempFile('review.md', 'Review body from file'); const cmd = new CreatePRReview( diff --git a/tests/unit/cli/scm/scm-commands.test.ts b/tests/unit/cli/scm/scm-commands.test.ts index 19d7deaaf..f1800f064 100644 --- a/tests/unit/cli/scm/scm-commands.test.ts +++ b/tests/unit/cli/scm/scm-commands.test.ts @@ -141,7 +141,7 @@ describe('GetPRDiff command', () => { const cmd = new GetPRDiff(['--prNumber', '15'], makeMockConfig() as never); await cmd.run(); - expect(getPRDiff).toHaveBeenCalledWith('owner', 'repo', 15, undefined); + expect(getPRDiff).toHaveBeenCalledWith('owner', 'repo', 15, undefined, undefined); }); it('resolves owner/repo from env vars', async () => { @@ -150,7 +150,7 @@ describe('GetPRDiff command', () => { const cmd = new GetPRDiff(['--prNumber', '99'], makeMockConfig() as never); await cmd.run(); - expect(getPRDiff).toHaveBeenCalledWith('acme', 'webapp', 99, undefined); + expect(getPRDiff).toHaveBeenCalledWith('acme', 'webapp', 99, undefined, undefined); }); it('passes optional path to getPRDiff', async () => { @@ -160,7 +160,7 @@ describe('GetPRDiff command', () => { ); await cmd.run(); - expect(getPRDiff).toHaveBeenCalledWith('owner', 'repo', 15, 'src/old.ts'); + expect(getPRDiff).toHaveBeenCalledWith('owner', 'repo', 15, 'src/old.ts', undefined); }); it('outputs JSON success result', async () => { @@ -172,6 +172,43 @@ describe('GetPRDiff command', () => { const output = JSON.parse(logSpy.mock.calls[0][0] as string); expect(output.success).toBe(true); }); + + // MNG-1059: --output-file mode streams the raw multiline diff to disk and + // returns a compact summary, sidestepping stdout truncation on big diffs. + it('passes --output-file through to getPRDiff', async () => { + vi.mocked(getPRDiff).mockResolvedValue({ + outputFile: '/tmp/diff.md', + fileCount: 1, + bytes: 1234, + } as never); + const cmd = new GetPRDiff( + ['--prNumber', '15', '--outputFile', '/tmp/diff.md'], + makeMockConfig() as never, + ); + const logSpy = vi.spyOn(cmd, 'log'); + await cmd.run(); + + expect(getPRDiff).toHaveBeenCalledWith('owner', 'repo', 15, undefined, '/tmp/diff.md'); + const output = JSON.parse(logSpy.mock.calls[0][0] as string); + expect(output.success).toBe(true); + expect(output.data).toEqual({ outputFile: '/tmp/diff.md', fileCount: 1, bytes: 1234 }); + }); + + it('passes --outputFile combined with --path to getPRDiff', async () => { + vi.mocked(getPRDiff).mockResolvedValue({ + outputFile: '/tmp/diff.md', + fileCount: 1, + bytes: 50, + pathFilter: 'src/big.json', + } as never); + const cmd = new GetPRDiff( + ['--prNumber', '15', '--path', 'src/big.json', '--outputFile', '/tmp/diff.md'], + makeMockConfig() as never, + ); + await cmd.run(); + + expect(getPRDiff).toHaveBeenCalledWith('owner', 'repo', 15, 'src/big.json', '/tmp/diff.md'); + }); }); // --------------------------------------------------------------------------- diff --git a/tests/unit/gadgets/github/core/getPRDiff.test.ts b/tests/unit/gadgets/github/core/getPRDiff.test.ts index 97e02c6b3..cbd5d149b 100644 --- a/tests/unit/gadgets/github/core/getPRDiff.test.ts +++ b/tests/unit/gadgets/github/core/getPRDiff.test.ts @@ -1,4 +1,8 @@ -import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { mkdtempSync, readFileSync } from 'node:fs'; +import { rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; vi.mock('../../../../../src/github/client.js', () => ({ githubClient: { @@ -6,7 +10,12 @@ vi.mock('../../../../../src/github/client.js', () => ({ }, })); -import { getPRDiff } from '../../../../../src/gadgets/github/core/getPRDiff.js'; +import { + filterByPath, + formatPRDiffFile, + formatPRDiffPayload, + getPRDiff, +} from '../../../../../src/gadgets/github/core/getPRDiff.js'; import { githubClient } from '../../../../../src/github/client.js'; const mockGithub = vi.mocked(githubClient); @@ -118,3 +127,254 @@ describe('getPRDiff', () => { expect(result).toBe('Error fetching PR diff: API rate limit exceeded'); }); }); + +// --------------------------------------------------------------------------- +// MNG-1059: formatting helpers are pure and testable in isolation. +// --------------------------------------------------------------------------- + +describe('filterByPath', () => { + const files = [ + { + filename: 'src/new.ts', + previousFilename: 'src/old.ts', + status: 'renamed' as const, + additions: 1, + deletions: 1, + changes: 2, + }, + { + filename: 'src/other.ts', + status: 'modified' as const, + additions: 1, + deletions: 0, + changes: 1, + }, + ] as Awaited>; + + it('returns the full list when path is undefined', () => { + expect(filterByPath(files, undefined)).toHaveLength(2); + }); + + it('matches on current filename', () => { + const result = filterByPath(files, 'src/other.ts'); + expect(result).toHaveLength(1); + expect(result[0].filename).toBe('src/other.ts'); + }); + + it('matches on previousFilename for renames', () => { + const result = filterByPath(files, 'src/old.ts'); + expect(result).toHaveLength(1); + expect(result[0].filename).toBe('src/new.ts'); + }); + + it('returns empty when path does not match', () => { + expect(filterByPath(files, 'src/missing.ts')).toEqual([]); + }); +}); + +describe('formatPRDiffFile', () => { + it('renders header + status + diff fence when patch present', () => { + const file = { + filename: 'src/x.ts', + status: 'modified', + additions: 3, + deletions: 1, + patch: '@@ -1 +1 @@\n-old\n+new', + } as Awaited>[number]; + + const result = formatPRDiffFile(file); + expect(result).toContain('## src/x.ts'); + expect(result).toContain('Status: modified | +3 -1'); + expect(result).toContain('```diff'); + expect(result).toContain('@@ -1 +1 @@'); + }); + + it('renders previousFilename when present (rename)', () => { + const file = { + filename: 'src/new.ts', + previousFilename: 'src/old.ts', + status: 'renamed', + additions: 0, + deletions: 0, + patch: '@@ -1 +1 @@', + } as Awaited>[number]; + + const result = formatPRDiffFile(file); + expect(result).toContain('Previous filename: src/old.ts'); + }); + + it('renders binary/too-large marker when patch is missing', () => { + const file = { + filename: 'images/logo.png', + status: 'added', + additions: 0, + deletions: 0, + patch: undefined, + } as Awaited>[number]; + + const result = formatPRDiffFile(file); + expect(result).toContain('[Binary file or too large to display]'); + expect(result).not.toContain('```diff'); + }); +}); + +describe('formatPRDiffPayload', () => { + it('returns "No files changed" for empty list with no path', () => { + expect(formatPRDiffPayload([], undefined)).toBe('No files changed in this PR.'); + }); + + it('returns path-specific message for empty list with path filter', () => { + expect(formatPRDiffPayload([], 'src/missing.ts')).toBe( + 'No changed file matched path: src/missing.ts', + ); + }); + + it('prefixes with file count and joins sections with double newline', () => { + const files = [ + { + filename: 'a.ts', + status: 'modified', + additions: 1, + deletions: 0, + patch: '@@ -1 +1 @@\n+x', + }, + { + filename: 'b.ts', + status: 'added', + additions: 2, + deletions: 0, + patch: '@@ -0,0 +1,2 @@\n+a\n+b', + }, + ] as Awaited>; + + const result = formatPRDiffPayload(files, undefined); + expect(result.startsWith('2 file(s) changed:')).toBe(true); + expect(result).toContain('## a.ts'); + expect(result).toContain('## b.ts'); + }); +}); + +// --------------------------------------------------------------------------- +// MNG-1059: --output-file mode +// --------------------------------------------------------------------------- + +describe('getPRDiff with --output-file', () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = mkdtempSync(join(tmpdir(), 'cascade-prdiff-output-test-')); + vi.clearAllMocks(); + }); + + afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }); + }); + + it('writes the full formatted diff to disk and returns compact summary', async () => { + mockGithub.getPRDiff.mockResolvedValue([ + { + filename: 'src/foo.ts', + status: 'modified', + additions: 5, + deletions: 2, + patch: '@@ -1,2 +1,5 @@\n-old\n+new\n+another', + }, + ] as Awaited>); + + const outputPath = join(tmpDir, 'diff.md'); + const result = await getPRDiff('owner', 'repo', 42, undefined, outputPath); + + // Returned value is the compact JSON summary, not the raw diff text. + expect(typeof result).toBe('object'); + const summary = result as { + outputFile: string; + fileCount: number; + bytes: number; + pathFilter?: string; + }; + expect(summary.outputFile).toBe(outputPath); + expect(summary.fileCount).toBe(1); + expect(summary.bytes).toBeGreaterThan(0); + expect(summary.pathFilter).toBeUndefined(); + + // The full multi-line diff is on disk. + const written = readFileSync(outputPath, 'utf-8'); + expect(written).toContain('## src/foo.ts'); + expect(written).toContain('@@ -1,2 +1,5 @@'); + expect(Buffer.byteLength(written, 'utf-8')).toBe(summary.bytes); + }); + + it('keeps stdout small even with a giant one-line JSON patch', async () => { + // Synthesize an absurdly large single-line JSON diff (the MNG-1045 case). + const bigPatch = `@@ -1,1 +1,1 @@\n-${'x'.repeat(50_000)}\n+${'y'.repeat(50_000)}`; + mockGithub.getPRDiff.mockResolvedValue([ + { + filename: 'huge.json', + status: 'modified', + additions: 1, + deletions: 1, + patch: bigPatch, + }, + ] as Awaited>); + + const outputPath = join(tmpDir, 'huge.md'); + const result = await getPRDiff('owner', 'repo', 42, undefined, outputPath); + + const summary = result as { outputFile: string; fileCount: number; bytes: number }; + // Summary stays tiny — only a handful of fields, well under 1KB + // regardless of how big the patch grows. + const summaryJsonSize = JSON.stringify(summary).length; + expect(summaryJsonSize).toBeLessThan(500); + + // But the actual file on disk holds the full payload (>100KB). + const written = readFileSync(outputPath, 'utf-8'); + expect(written.length).toBeGreaterThan(100_000); + expect(summary.bytes).toBeGreaterThan(100_000); + }); + + it('records pathFilter on the summary when --path is supplied', async () => { + mockGithub.getPRDiff.mockResolvedValue([ + { + filename: 'src/foo.ts', + status: 'modified', + additions: 1, + deletions: 1, + patch: '@@ -1 +1 @@', + }, + ] as Awaited>); + + const outputPath = join(tmpDir, 'diff.md'); + const result = await getPRDiff('owner', 'repo', 42, 'src/foo.ts', outputPath); + const summary = result as { pathFilter?: string }; + expect(summary.pathFilter).toBe('src/foo.ts'); + }); + + it('writes the no-match sentinel to disk when path filter produces 0 matches', async () => { + mockGithub.getPRDiff.mockResolvedValue([ + { + filename: 'src/other.ts', + status: 'modified', + additions: 1, + deletions: 0, + patch: '@@ -1 +1 @@', + }, + ] as Awaited>); + + const outputPath = join(tmpDir, 'diff.md'); + const result = await getPRDiff('owner', 'repo', 42, 'src/missing.ts', outputPath); + const summary = result as { fileCount: number; pathFilter?: string }; + expect(summary.fileCount).toBe(0); + expect(summary.pathFilter).toBe('src/missing.ts'); + const written = readFileSync(outputPath, 'utf-8'); + expect(written).toBe('No changed file matched path: src/missing.ts'); + }); + + it('throws on githubClient failure (output-file mode follows the throw convention)', async () => { + mockGithub.getPRDiff.mockRejectedValue(new Error('API rate limit')); + const outputPath = join(tmpDir, 'diff.md'); + + await expect(getPRDiff('owner', 'repo', 42, undefined, outputPath)).rejects.toThrow( + 'API rate limit', + ); + }); +}); diff --git a/tests/unit/gadgets/shared/cli/params.test.ts b/tests/unit/gadgets/shared/cli/params.test.ts index 2bf71ea54..db816efb7 100644 --- a/tests/unit/gadgets/shared/cli/params.test.ts +++ b/tests/unit/gadgets/shared/cli/params.test.ts @@ -6,7 +6,10 @@ import { Writable } from 'node:stream'; import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ErrorSink } from '../../../../../src/gadgets/shared/cli/errorSink.js'; -import { resolveDirectParams } from '../../../../../src/gadgets/shared/cli/params.js'; +import { + rejectMultipleStdinConsumers, + resolveDirectParams, +} from '../../../../../src/gadgets/shared/cli/params.js'; import type { CLIAutoResolved, FileInputAlternative, @@ -173,3 +176,132 @@ describe('CLI parameter resolution', () => { ).toThrow('exit'); }); }); + +// --------------------------------------------------------------------------- +// MNG-1059: multiple-stdin-consumer guard +// --------------------------------------------------------------------------- + +describe('rejectMultipleStdinConsumers (MNG-1059)', () => { + const fileAlts: FileInputAlternative[] = [ + { paramName: 'body', fileFlag: 'body-file' }, + { paramName: 'comments', fileFlag: 'comments-file', parseAs: 'json' }, + ]; + + it('no-ops when no file flags are set', () => { + const sink = makeSink(); + rejectMultipleStdinConsumers(fileAlts, { body: 'hello' }, sink); + expect(sink.exit).not.toHaveBeenCalled(); + }); + + it('no-ops when only one file flag is set to "-"', () => { + const sink = makeSink(); + rejectMultipleStdinConsumers( + fileAlts, + { 'body-file': '-', 'comments-file': '/tmp/comments.json' }, + sink, + ); + expect(sink.exit).not.toHaveBeenCalled(); + }); + + it('no-ops when both file flags resolve to real paths (not stdin)', () => { + const sink = makeSink(); + rejectMultipleStdinConsumers( + fileAlts, + { 'body-file': '/tmp/body.md', 'comments-file': '/tmp/comments.json' }, + sink, + ); + expect(sink.exit).not.toHaveBeenCalled(); + }); + + it('emits flag-parse envelope when two file flags are both set to "-"', () => { + const stdoutChunks: string[] = []; + const sink: ErrorSink = { + stdout: new Writable({ + write(chunk, _enc, cb) { + stdoutChunks.push(chunk.toString()); + cb(); + }, + }), + stderr: new Writable({ + write(_chunk, _enc, cb) { + cb(); + }, + }), + exit: vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }), + }; + + expect(() => + rejectMultipleStdinConsumers(fileAlts, { 'body-file': '-', 'comments-file': '-' }, sink), + ).toThrow('exit'); + + expect(sink.exit).toHaveBeenCalledWith(1); + const envelope = JSON.parse(stdoutChunks.join('').trim()) as { + success: boolean; + error: { type: string; flag?: string; message?: string; hint?: string }; + }; + expect(envelope.success).toBe(false); + expect(envelope.error.type).toBe('flag-parse'); + expect(envelope.error.flag).toBe('body-file,comments-file'); + expect(envelope.error.message).toContain('stdin can only be drained once'); + expect(envelope.error.hint).toContain('temp file'); + }); + + it('does not emit when only one of three file flags is "-"', () => { + const sink = makeSink(); + const alts: FileInputAlternative[] = [ + { paramName: 'body', fileFlag: 'body-file' }, + { paramName: 'comments', fileFlag: 'comments-file' }, + { paramName: 'description', fileFlag: 'description-file' }, + ]; + rejectMultipleStdinConsumers( + alts, + { + 'body-file': '-', + 'comments-file': '/tmp/c.json', + 'description-file': '/tmp/d.md', + }, + sink, + ); + expect(sink.exit).not.toHaveBeenCalled(); + }); + + it('emits when three file flags all set to "-"', () => { + const stdoutChunks: string[] = []; + const sink: ErrorSink = { + stdout: new Writable({ + write(chunk, _enc, cb) { + stdoutChunks.push(chunk.toString()); + cb(); + }, + }), + stderr: new Writable({ + write(_chunk, _enc, cb) { + cb(); + }, + }), + exit: vi.fn<(code: number) => never>(() => { + throw new Error('exit'); + }), + }; + const alts: FileInputAlternative[] = [ + { paramName: 'body', fileFlag: 'body-file' }, + { paramName: 'comments', fileFlag: 'comments-file' }, + { paramName: 'description', fileFlag: 'description-file' }, + ]; + + expect(() => + rejectMultipleStdinConsumers( + alts, + { 'body-file': '-', 'comments-file': '-', 'description-file': '-' }, + sink, + ), + ).toThrow('exit'); + + const envelope = JSON.parse(stdoutChunks.join('').trim()) as { + error: { flag?: string }; + }; + expect(envelope.error.flag).toBe('body-file,comments-file,description-file'); + }); +}); diff --git a/tests/unit/gadgets/shared/factories.test.ts b/tests/unit/gadgets/shared/factories.test.ts index c15951817..dbee069b7 100644 --- a/tests/unit/gadgets/shared/factories.test.ts +++ b/tests/unit/gadgets/shared/factories.test.ts @@ -1065,6 +1065,149 @@ describe('createCLICommand — spec 014 additions', () => { }); }); +// --------------------------------------------------------------------------- +// MNG-1059: multiple-stdin-consumer rejection at the factory level +// --------------------------------------------------------------------------- + +describe('createCLICommand — multiple stdin consumer rejection (MNG-1059)', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + const dualFileInputDef: ToolDefinition = { + name: 'TestReviewWithBoth', + description: 'Review a PR with both body and comments file inputs.', + parameters: { + body: { type: 'string', describe: 'Body', required: true }, + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + cli: { + fileInputAlternatives: [ + { paramName: 'body', fileFlag: 'body-file' }, + { + paramName: 'comments', + fileFlag: 'comments-file', + parseAs: 'json', + }, + ], + }, + }; + + it('rejects --body-file - and --comments-file - with a flag-parse envelope', async () => { + const coreFn = vi.fn(); + const CommandClass = createCLICommand(dualFileInputDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: undefined, + comments: undefined, + 'body-file': '-', + 'comments-file': '-', + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + const logSpy = vi.spyOn(instance, 'log').mockImplementation(() => {}); + vi.spyOn(instance, 'exit').mockImplementation(() => { + throw new Error('exit'); + }); + + await expect(instance.execute()).rejects.toThrow('exit'); + + // Core function must NOT have been called — the guard runs before any + // stdin read, so neither payload gets consumed (and corrupted). + expect(coreFn).not.toHaveBeenCalled(); + + const logged = logSpy.mock.calls.map((c) => c[0]).join('\n'); + const jsonLine = logged.split('\n').find((l) => l.startsWith('{')) ?? ''; + const parsed = JSON.parse(jsonLine) as { + success: boolean; + error: { type: string; flag?: string; message?: string; hint?: string }; + }; + expect(parsed.success).toBe(false); + expect(parsed.error.type).toBe('flag-parse'); + expect(parsed.error.flag).toBe('body-file,comments-file'); + expect(parsed.error.message).toContain('stdin can only be drained once'); + expect(parsed.error.hint).toContain('temp file'); + }); + + it('allows --body-file - paired with --comments-file ', async () => { + mockReadFileSync.mockImplementation((target: unknown) => { + if (target === 0) return 'stdin body'; + if (target === '/tmp/comments.json') return '[{"path":"x.ts","line":1,"body":"nit"}]'; + throw new Error(`unexpected readFileSync target: ${String(target)}`); + }); + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'ok'; + }; + const CommandClass = createCLICommand(dualFileInputDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: undefined, + comments: undefined, + 'body-file': '-', + 'comments-file': '/tmp/comments.json', + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + expect(capturedParams.body).toBe('stdin body'); + expect(capturedParams.comments).toEqual([{ path: 'x.ts', line: 1, body: 'nit' }]); + }); + + it('allows --body-file paired with --comments-file -', async () => { + mockReadFileSync.mockImplementation((target: unknown) => { + if (target === 0) return '[{"path":"x.ts","line":1,"body":"nit"}]'; + if (target === '/tmp/body.md') return 'review body from file'; + throw new Error(`unexpected readFileSync target: ${String(target)}`); + }); + let capturedParams: Record = {}; + const coreFn: CLICoreFn = async (params) => { + capturedParams = params as Record; + return 'ok'; + }; + const CommandClass = createCLICommand(dualFileInputDef, coreFn); + const instance = new CommandClass([], {}); + + vi.spyOn(instance, 'parse').mockResolvedValue({ + flags: { + body: undefined, + comments: undefined, + 'body-file': '/tmp/body.md', + 'comments-file': '-', + }, + args: {}, + argv: [], + raw: [], + } as unknown as Awaited>); + + vi.spyOn(instance, 'log').mockImplementation(() => {}); + + await instance.execute(); + + expect(capturedParams.body).toBe('review body from file'); + expect(capturedParams.comments).toEqual([{ path: 'x.ts', line: 1, body: 'nit' }]); + }); +}); + // --------------------------------------------------------------------------- // generateToolManifest tests // --------------------------------------------------------------------------- @@ -1407,3 +1550,97 @@ describe('generateToolManifest — widened fields (spec 014)', () => { expect(commentsParam).not.toHaveProperty('example'); }); }); + +// --------------------------------------------------------------------------- +// MNG-1059: manifest threads fileInputFor / fileInputAlternative cross-refs +// --------------------------------------------------------------------------- + +describe('generateToolManifest — file-input metadata threading (MNG-1059)', () => { + it('threads fileInputAlternative onto the direct text parameter when a file companion is declared', () => { + const def: ToolDefinition = { + name: 'PostPRComment', + description: 'Post a PR comment.', + parameters: { + prNumber: { type: 'number', describe: 'PR number', required: true }, + body: { type: 'string', describe: 'The comment body', required: true }, + }, + cli: { + fileInputAlternatives: [ + { + paramName: 'body', + fileFlag: 'body-file', + description: 'Read body from file (use - for stdin)', + }, + ], + }, + }; + + const manifest = generateToolManifest(def); + const bodyParam = manifest.parameters.body as { fileInputAlternative?: string }; + expect(bodyParam.fileInputAlternative).toBe('body-file'); + }); + + it('threads fileInputFor onto the synthesized --*-file flag entry', () => { + const def: ToolDefinition = { + name: 'PostPRComment', + description: 'Post a PR comment.', + parameters: { + body: { type: 'string', describe: 'The comment body', required: true }, + }, + cli: { + fileInputAlternatives: [{ paramName: 'body', fileFlag: 'body-file' }], + }, + }; + + const manifest = generateToolManifest(def); + const bodyFileParam = manifest.parameters['body-file'] as { fileInputFor?: string }; + expect(bodyFileParam.fileInputFor).toBe('body'); + }); + + it('does not attach fileInputAlternative when no file companion is declared', () => { + const def: ToolDefinition = { + name: 'Simple', + description: 'Simple tool.', + parameters: { + body: { type: 'string', describe: 'body', required: true }, + }, + }; + + const manifest = generateToolManifest(def); + const bodyParam = manifest.parameters.body as { fileInputAlternative?: string }; + expect(bodyParam.fileInputAlternative).toBeUndefined(); + }); + + it('handles multiple file companions in a single definition', () => { + const def: ToolDefinition = { + name: 'ReviewPR', + description: 'Review a PR.', + parameters: { + body: { type: 'string', describe: 'Review body', required: true }, + comments: { + type: 'array', + items: 'object', + describe: 'Inline comments', + optional: true, + }, + }, + cli: { + fileInputAlternatives: [ + { paramName: 'body', fileFlag: 'body-file' }, + { paramName: 'comments', fileFlag: 'comments-file', parseAs: 'json' }, + ], + }, + }; + + const manifest = generateToolManifest(def); + const bodyParam = manifest.parameters.body as { fileInputAlternative?: string }; + const commentsParam = manifest.parameters.comments as { fileInputAlternative?: string }; + const bodyFile = manifest.parameters['body-file'] as { fileInputFor?: string }; + const commentsFile = manifest.parameters['comments-file'] as { fileInputFor?: string }; + + expect(bodyParam.fileInputAlternative).toBe('body-file'); + expect(commentsParam.fileInputAlternative).toBe('comments-file'); + expect(bodyFile.fileInputFor).toBe('body'); + expect(commentsFile.fileInputFor).toBe('comments'); + }); +}); From ddbf4d04fe00d5058a374a3e57bb3fcd9f65585a Mon Sep 17 00:00:00 2001 From: Cascade Bot Date: Tue, 19 May 2026 09:22:42 +0000 Subject: [PATCH 3/4] fix: address feedback --- src/gadgets/github/core/getPRDiff.ts | 2 +- src/gadgets/github/definitions.ts | 3 +- src/gadgets/shared/gadgetFactory.ts | 19 +++++++---- src/gadgets/shared/toolDefinition.ts | 4 +-- tests/unit/cli/scm/scm-commands.test.ts | 6 ++-- .../gadgets/github/core/getPRDiff.test.ts | 6 ++-- tests/unit/gadgets/github/definitions.test.ts | 10 ++++++ tests/unit/gadgets/shared/factories.test.ts | 32 +++++++++++++++++++ 8 files changed, 66 insertions(+), 16 deletions(-) diff --git a/src/gadgets/github/core/getPRDiff.ts b/src/gadgets/github/core/getPRDiff.ts index cdaa37ba7..61af1da8e 100644 --- a/src/gadgets/github/core/getPRDiff.ts +++ b/src/gadgets/github/core/getPRDiff.ts @@ -49,7 +49,7 @@ export function formatPRDiffPayload(files: PRDiffFile[], path: string | undefine } /** - * MNG-1059: the compact JSON summary returned when `--output-file` is set. + * MNG-1059: the compact JSON summary returned when `--outputFile` is set. * Keeps the raw multi-megabyte diff text off stdout (terminal truncation, * agent context bloat) while preserving the cascade-tools JSON contract. */ diff --git a/src/gadgets/github/definitions.ts b/src/gadgets/github/definitions.ts index 5a456df77..d2b8df11a 100644 --- a/src/gadgets/github/definitions.ts +++ b/src/gadgets/github/definitions.ts @@ -304,7 +304,7 @@ export const getPRDetailsDef: ToolDefinition = { export const getPRDiffDef: ToolDefinition = { name: 'GetPRDiff', description: - 'Get the unified diff of all file changes in a GitHub pull request. Shows each file with additions, deletions, and the patch content. For large diffs or one-line JSON patches that would truncate stdout, use --output-file to write the full diff to disk and return a compact summary.', + 'Get the unified diff of all file changes in a GitHub pull request. Shows each file with additions, deletions, and the patch content. CLI/native-tool users can pass --outputFile to write large diffs or one-line JSON patches to disk and return a compact summary.', timeoutMs: 30000, parameters: { comment: { @@ -342,6 +342,7 @@ export const getPRDiffDef: ToolDefinition = { 'Optional path: when set, write the raw multiline diff text to this file and return a compact JSON summary {outputFile, fileCount, bytes, pathFilter} instead of the full payload. Use this for large diffs / one-line JSON patches that would truncate stdout.', optional: true, cliOnly: true, + cliAliases: ['output-file'], }, }, examples: [ diff --git a/src/gadgets/shared/gadgetFactory.ts b/src/gadgets/shared/gadgetFactory.ts index 647865e53..23caf0654 100644 --- a/src/gadgets/shared/gadgetFactory.ts +++ b/src/gadgets/shared/gadgetFactory.ts @@ -109,7 +109,7 @@ function buildZodField(def: ParameterDefinition) { * Convert a ParameterMap to a Zod object schema. * Gadget schemas include `gadgetOnly` params (like `comment`) but EXCLUDE * `cliOnly` params (CLI-only output destination flags such as - * `get-pr-diff --output-file`) — those have no meaningful in-process semantics. + * `get-pr-diff --outputFile`) — those have no meaningful in-process semantics. */ export function buildZodSchema(parameters: ParameterMap) { const shape: Record> = {}; @@ -153,13 +153,20 @@ export type GadgetCoreFn = Record paramDef.cliOnly) + .map(([name]) => name), + ); // Map ToolExample to GadgetExample - const examples: GadgetExample[] | undefined = def.examples?.map((ex) => ({ - params: ex.params as GadgetExampleParams, - output: ex.output, - comment: ex.comment, - })); + const examples: GadgetExample[] | undefined = def.examples + ?.filter((ex) => !Object.keys(ex.params).some((paramName) => cliOnlyParams.has(paramName))) + .map((ex) => ({ + params: ex.params as GadgetExampleParams, + output: ex.output, + comment: ex.comment, + })); const postExecute: GadgetPostExecuteHook | undefined = def.gadgetPostExecute; diff --git a/src/gadgets/shared/toolDefinition.ts b/src/gadgets/shared/toolDefinition.ts index 10efdab9f..68541e877 100644 --- a/src/gadgets/shared/toolDefinition.ts +++ b/src/gadgets/shared/toolDefinition.ts @@ -110,13 +110,13 @@ interface BaseParameterDefinition { gadgetOnly?: boolean; /** * If `true`, this parameter is exposed only via the CLI surface (e.g. output - * destination flags like `--output-file` that have no in-process equivalent + * destination flags like `--outputFile` that have no in-process equivalent * for an SDK gadget). The flag is included in the CLI, in the agent-facing * tool manifest (so the prompt shows it), and in oclif help output — but is * EXCLUDED from the Zod schema the SDK Gadget exposes. * * Mutually exclusive with `gadgetOnly`. Used by MNG-1059 for the - * `get-pr-diff --output-file ` escape hatch. + * `get-pr-diff --outputFile ` escape hatch. */ cliOnly?: boolean; /** diff --git a/tests/unit/cli/scm/scm-commands.test.ts b/tests/unit/cli/scm/scm-commands.test.ts index f1800f064..807e59ea7 100644 --- a/tests/unit/cli/scm/scm-commands.test.ts +++ b/tests/unit/cli/scm/scm-commands.test.ts @@ -173,16 +173,16 @@ describe('GetPRDiff command', () => { expect(output.success).toBe(true); }); - // MNG-1059: --output-file mode streams the raw multiline diff to disk and + // MNG-1059: --outputFile mode streams the raw multiline diff to disk and // returns a compact summary, sidestepping stdout truncation on big diffs. - it('passes --output-file through to getPRDiff', async () => { + it('passes --output-file alias through to getPRDiff', async () => { vi.mocked(getPRDiff).mockResolvedValue({ outputFile: '/tmp/diff.md', fileCount: 1, bytes: 1234, } as never); const cmd = new GetPRDiff( - ['--prNumber', '15', '--outputFile', '/tmp/diff.md'], + ['--prNumber', '15', '--output-file', '/tmp/diff.md'], makeMockConfig() as never, ); const logSpy = vi.spyOn(cmd, 'log'); diff --git a/tests/unit/gadgets/github/core/getPRDiff.test.ts b/tests/unit/gadgets/github/core/getPRDiff.test.ts index cbd5d149b..754c6006d 100644 --- a/tests/unit/gadgets/github/core/getPRDiff.test.ts +++ b/tests/unit/gadgets/github/core/getPRDiff.test.ts @@ -255,10 +255,10 @@ describe('formatPRDiffPayload', () => { }); // --------------------------------------------------------------------------- -// MNG-1059: --output-file mode +// MNG-1059: --outputFile mode // --------------------------------------------------------------------------- -describe('getPRDiff with --output-file', () => { +describe('getPRDiff with --outputFile', () => { let tmpDir: string; beforeEach(() => { @@ -369,7 +369,7 @@ describe('getPRDiff with --output-file', () => { expect(written).toBe('No changed file matched path: src/missing.ts'); }); - it('throws on githubClient failure (output-file mode follows the throw convention)', async () => { + it('throws on githubClient failure (outputFile mode follows the throw convention)', async () => { mockGithub.getPRDiff.mockRejectedValue(new Error('API rate limit')); const outputPath = join(tmpDir, 'diff.md'); diff --git a/tests/unit/gadgets/github/definitions.test.ts b/tests/unit/gadgets/github/definitions.test.ts index f5b1775f7..11f55423b 100644 --- a/tests/unit/gadgets/github/definitions.test.ts +++ b/tests/unit/gadgets/github/definitions.test.ts @@ -301,6 +301,16 @@ describe('getPRDiffDef', () => { expect(getPRDiffDef.parameters.path?.required).toBeUndefined(); }); + it('outputFile is CLI-only and accepts the kebab-case alias', () => { + const outputFile = getPRDiffDef.parameters.outputFile as { + cliOnly?: boolean; + cliAliases?: readonly string[]; + }; + + expect(outputFile.cliOnly).toBe(true); + expect(outputFile.cliAliases).toEqual(['output-file']); + }); + it('generated schema accepts a call without path (full-PR behavior unchanged)', () => { const schema = buildZodSchema(getPRDiffDef.parameters); const result = schema.safeParse({ diff --git a/tests/unit/gadgets/shared/factories.test.ts b/tests/unit/gadgets/shared/factories.test.ts index dbee069b7..4da31dbce 100644 --- a/tests/unit/gadgets/shared/factories.test.ts +++ b/tests/unit/gadgets/shared/factories.test.ts @@ -365,6 +365,38 @@ describe('createGadgetClass', () => { expect(instance.examples?.[0]?.comment).toBe('Basic usage'); }); + it('filters examples that use CLI-only params out of SDK gadget examples', () => { + const coreFn: GadgetCoreFn = async () => 'ok'; + const def: ToolDefinition = { + name: 'CliOnlyExampleTool', + description: 'A tool with a CLI-only example', + parameters: { + value: { type: 'string', describe: 'A value', required: true }, + outputFile: { + type: 'string', + describe: 'Write output to a file', + optional: true, + cliOnly: true, + }, + }, + examples: [ + { + params: { value: 'visible' }, + comment: 'SDK-safe example', + }, + { + params: { value: 'cli', outputFile: '/tmp/out.txt' }, + comment: 'CLI-only example', + }, + ], + }; + const GadgetClass = createGadgetClass(def, coreFn); + + const instance = new GadgetClass(); + expect(instance.examples).toHaveLength(1); + expect(instance.examples?.[0]?.params).toEqual({ value: 'visible' }); + }); + it('handles definition with no examples', () => { const coreFn: GadgetCoreFn = async () => 'ok'; const noExamplesDef: ToolDefinition = { From 185ecb015a41b9d7530a99279066cb4b5842ebfb Mon Sep 17 00:00:00 2001 From: Cascade Bot Date: Tue, 19 May 2026 09:41:07 +0000 Subject: [PATCH 4/4] fix: address feedback --- CHANGELOG.md | 2 +- CLAUDE.md | 2 +- src/agents/prompts/templates/review.eta | 2 +- src/agents/shared/prFormatting.ts | 4 ++-- tests/unit/agents/shared/prFormatting.test.ts | 14 ++++++++++++++ 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4916475c8..d8facad0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ All notable user-visible changes to CASCADE are documented here. The format is l - **`cascade-tools` multiline text and large diff I/O are now hardened against shell-quoting footguns and stdout truncation** ([MNG-1059](https://linear.app/issue/MNG-1059)). The shared CLI factory at `src/gadgets/shared/cli/params.ts` now rejects invocations that pass `--*-file -` for two or more file-input flags (e.g. `--body-file - --comments-file -`) before any `readFileSync(0, ...)` call — stdin (fd 0) can only be drained once per process, and the previous behavior silently truncated one of the two agent payloads. The rejection emits a structured `flag-parse` error envelope (`error.flag: "body-file,comments-file"`, `hint: "Pass at most one --*-file -; for the others, write the payload to a temp file and pass ---file ."`) so agents can self-correct on the next attempt. Direct file paths remain pairwise-compatible — `--body-file - --comments-file /tmp/comments.json` and `--body-file /tmp/body.md --comments-file -` both work as before. The native-tool system prompt now renders a "cascade-tools shell-safety rules" section that documents the one-stdin-consumer invariant and provides safe heredoc / temp-file patterns for one and two payloads. The prompt renderer also suppresses inline `--body '...'` / `--text '...'` examples whose content contains backticks, code fences, `$(...)`, or newlines when a file-input companion is declared, redirecting the agent at the safer `--*-file ` form instead. File-input flag descriptions for `--body-file`, `--text-file`, `--description-file`, `--details-file`, and `--comments-file` explicitly call out markdown / multiline / backticks. Closes [MNG-908](https://linear.app/mongrel/issue/MNG-908), [MNG-910](https://linear.app/mongrel/issue/MNG-910), [MNG-917](https://linear.app/mongrel/issue/MNG-917), [MNG-1046](https://linear.app/mongrel/issue/MNG-1046). -- **`cascade-tools scm get-pr-diff` gains an `--outputFile ` escape hatch for large diffs and one-line JSON patches** ([MNG-1045](https://linear.app/mongrel/issue/MNG-1045)). When `--outputFile` is set, the full multiline Markdown diff is written to the requested path on disk and stdout returns a compact JSON summary `{outputFile, fileCount, bytes, pathFilter}` instead of the raw payload — sidestepping terminal-truncation issues with hundreds-of-kilobytes one-line JSON diffs. Default behavior is preserved: without `--outputFile`, `get-pr-diff` returns the formatted Markdown directly. The review-agent skipped-files guidance now points operators at this form (`cascade-tools scm get-pr-diff --prNumber --path --outputFile /tmp/diff-.md`) when a file would otherwise truncate. The `--outputFile` flag is declared as `cliOnly: true` on the underlying ToolDefinition so it appears in the CLI + agent-facing manifest but is excluded from the SDK Gadget Zod schema (gadgets return strings in-process and cannot deliver a file path back through that contract). +- **`cascade-tools scm get-pr-diff` gains an `--outputFile ` escape hatch for large diffs and one-line JSON patches** ([MNG-1045](https://linear.app/mongrel/issue/MNG-1045)). When `--outputFile` is set, the full multiline Markdown diff is written to the requested path on disk and stdout returns a compact JSON summary `{outputFile, fileCount, bytes, pathFilter}` instead of the raw payload — sidestepping terminal-truncation issues with hundreds-of-kilobytes one-line JSON diffs. Default behavior is preserved: without `--outputFile`, `get-pr-diff` returns the formatted Markdown directly. The review-agent skipped-files guidance now points operators at this form (`cascade-tools scm get-pr-diff --prNumber --path --outputFile /tmp/pr-diff.md`) when a file would otherwise truncate. The `--outputFile` flag is declared as `cliOnly: true` on the underlying ToolDefinition so it appears in the CLI + agent-facing manifest but is excluded from the SDK Gadget Zod schema (gadgets return strings in-process and cannot deliver a file path back through that contract). ### Changed diff --git a/CLAUDE.md b/CLAUDE.md index d3fff389b..5e0b16e70 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -148,7 +148,7 @@ The wedged-lock canary should never fire under normal operation. Its presence in Review agent receives a **compact per-file diff context**, not full file contents. Each changed file is a `### (, +N -M)` section with a unified diff hunk. Budget: `REVIEW_DIFF_CONTEXT_TOKEN_LIMIT` = 200k tokens, per-file cap 10%. -GitHub's changed-file API is used for file enumeration and change counts, but compact patch bodies come from the checked-out PR workspace via `git diff origin/...HEAD`. Files that can't fit or can't be locally verified (deleted, binary/no text patch, local diff failure/empty patch, oversized patch, or budget exhausted) are injected as `SKIPPED FILES` with instructions to fetch on demand via `cascade-tools scm get-pr-diff --prNumber --path `, `Read`, or `Grep`. Large or one-line JSON diffs that would truncate stdout should add `--outputFile /tmp/diff-.md` — the CLI writes the full multiline Markdown payload to disk and returns a compact `{outputFile, fileCount, bytes, pathFilter}` summary on stdout (MNG-1059 / MNG-1045). +GitHub's changed-file API is used for file enumeration and change counts, but compact patch bodies come from the checked-out PR workspace via `git diff origin/...HEAD`. Files that can't fit or can't be locally verified (deleted, binary/no text patch, local diff failure/empty patch, oversized patch, or budget exhausted) are injected as `SKIPPED FILES` with instructions to fetch on demand via `cascade-tools scm get-pr-diff --prNumber --path `, `Read`, or `Grep`. Large or one-line JSON diffs that would truncate stdout should add `--outputFile /tmp/pr-diff.md` — the CLI writes the full multiline Markdown payload to disk and returns a compact `{outputFile, fileCount, bytes, pathFilter}` summary on stdout (MNG-1059 / MNG-1045). When review output misses something, check the `PR context prepared` log entry for `included` / `skipped` / `skipReasons`, `patchSources`, `totalDiffTokens`, `perFileTokenCap`, and `localGitMismatches` to confirm whether the file was visible to the agent and whether GitHub's API patch differed from the local patch. Also check context offload logs if the diff context was written under `.cascade/context/`. diff --git a/src/agents/prompts/templates/review.eta b/src/agents/prompts/templates/review.eta index e89b818f8..737ea66e6 100644 --- a/src/agents/prompts/templates/review.eta +++ b/src/agents/prompts/templates/review.eta @@ -198,7 +198,7 @@ Use CreatePRReview with: - `body`: Summary of findings (or "LGTM" if no issues) - `comments`: Inline comments for specific issues -**CRITICAL**: Inline comments can ONLY be placed on files in the PR diff. If you comment on a file path not in the diff, or use line numbers outside the changed ranges, the entire review will fail. Only add inline comments for files you saw in GetPRDiffContext output or files you verified by fetching a skipped file on demand via `cascade-tools scm get-pr-diff --path`. For very large or one-line JSON diffs that would truncate stdout, add `--outputFile /tmp/diff-.md` and then `Read` that file — the CLI returns a compact summary while the full diff stays on disk. +**CRITICAL**: Inline comments can ONLY be placed on files in the PR diff. If you comment on a file path not in the diff, or use line numbers outside the changed ranges, the entire review will fail. Only add inline comments for files you saw in GetPRDiffContext output or files you verified by fetching a skipped file on demand via `cascade-tools scm get-pr-diff --path`. For very large or one-line JSON diffs that would truncate stdout, add `--outputFile /tmp/pr-diff.md` and then `Read` that file — the CLI returns a compact summary while the full diff stays on disk. **Submitting markdown review bodies with backticks, code fences, or multi-line content**: pass `--body-file ` (or `--body-file -` heredoc) instead of inline `--body '...'` — shells expand backticks and `$(...)` even inside quotes, and newlines fight argv parsing. If you also need to pass `--comments` JSON, write the body to a temp file and stream the comments via stdin: `--body-file /tmp/body.md --comments-file -` (only one `--*-file -` per command — stdin can only be drained once). diff --git a/src/agents/shared/prFormatting.ts b/src/agents/shared/prFormatting.ts index dd20af220..56add1246 100644 --- a/src/agents/shared/prFormatting.ts +++ b/src/agents/shared/prFormatting.ts @@ -259,8 +259,8 @@ export function formatSkippedFilesInjection( // the agent about the file-output escape hatch so it does not lose // content to terminal truncation. prNumber !== undefined - ? ` • \`cascade-tools scm get-pr-diff --prNumber ${prNumber} --path --outputFile /tmp/diff-.md\` for large or one-line JSON diffs, then \`Read /tmp/diff-.md\`` - : ' • `cascade-tools scm get-pr-diff --prNumber --path --outputFile /tmp/diff-.md` for large or one-line JSON diffs, then `Read /tmp/diff-.md`', + ? ` • \`cascade-tools scm get-pr-diff --prNumber ${prNumber} --path --outputFile /tmp/pr-diff.md\` for large or one-line JSON diffs, then \`Read /tmp/pr-diff.md\`` + : ' • `cascade-tools scm get-pr-diff --prNumber --path --outputFile /tmp/pr-diff.md` for large or one-line JSON diffs, then `Read /tmp/pr-diff.md`', ' • `Read ` to read the post-PR file content', ' • `Grep ` to search inside the file', '', diff --git a/tests/unit/agents/shared/prFormatting.test.ts b/tests/unit/agents/shared/prFormatting.test.ts index 65bcbb460..ea4ff56d6 100644 --- a/tests/unit/agents/shared/prFormatting.test.ts +++ b/tests/unit/agents/shared/prFormatting.test.ts @@ -6,6 +6,7 @@ import { formatPRDiff, formatPRIssueComments, formatPRReviews, + formatSkippedFilesInjection, type PRDiff, type SkippedFile, } from '../../../../src/agents/shared/prFormatting.js'; @@ -289,6 +290,19 @@ describe('formatPRIssueComments', () => { }); }); +describe('formatSkippedFilesInjection', () => { + it('uses a slash-safe output file path for large skipped diffs', () => { + const result = formatSkippedFilesInjection( + [{ filename: 'src/big.json', reason: 'patch-too-large' }], + 1092, + ); + + expect(result).toContain('--path --outputFile /tmp/pr-diff.md'); + expect(result).toContain('Read /tmp/pr-diff.md'); + expect(result).not.toContain('/tmp/diff-.md'); + }); +}); + // ============================================================================ // extractPRDiffs // ============================================================================