From 764c89b4bd2aadd0bb8f47338e95f2e875618806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 17:51:29 +0200 Subject: [PATCH 01/17] refactor: replace custom qa runner with cali --- .eas/workflows/agent-qa-mobile.yml | 11 - README.md | 38 +- cali.config.json | 10 + package-lock.json | 327 +++++-- package.json | 10 +- scripts/agent-qa/index.ts | 946 -------------------- scripts/agent-qa/package.json | 3 - scripts/agent-qa/provision-ios-simulator.sh | 13 - scripts/agent-qa/run-and-export.sh | 86 +- 9 files changed, 353 insertions(+), 1091 deletions(-) create mode 100644 cali.config.json delete mode 100644 scripts/agent-qa/index.ts delete mode 100644 scripts/agent-qa/package.json delete mode 100644 scripts/agent-qa/provision-ios-simulator.sh diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index c1955a8..31ee18e 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -116,10 +116,6 @@ jobs: run: | bash ./scripts/agent-qa/provision-android-emulator.sh - id: run_agent_qa - env: - AGENT_DEVICE_SESSION: qa-android - AGENT_DEVICE_PLATFORM: android - AGENT_DEVICE_SESSION_LOCK: strip run: | bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" @@ -156,14 +152,7 @@ jobs: with: build_id: ${{ env.BUILD_ID }} extensions: [app] - - id: provision_ios_simulator - run: | - bash ./scripts/agent-qa/provision-ios-simulator.sh - id: run_agent_qa - env: - AGENT_DEVICE_SESSION: qa-ios - AGENT_DEVICE_PLATFORM: ios - AGENT_DEVICE_SESSION_LOCK: strip run: | bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" diff --git a/README.md b/README.md index 2604ae3..cf7f4f0 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # EAS agent-device demo -This repo is a minimal Expo + CNG example for running AI-assisted Android and iOS QA on EAS Workflows. +This repo is a minimal Expo + CNG example for running AI-assisted Android and iOS QA on EAS Workflows with [`cali`](https://github.com/callstackincubator/cali). ## What it does - Reuses compatible Android and iOS simulator builds with `fingerprint` + `get-build` + `repack` - Falls back to a fresh `build` when the fingerprint changes -- Runs a small Node.js QA agent built with the AI SDK `ToolLoopAgent` +- Uses `cali qa` as the mobile QA agent runtime - Uses `agent-device` to drive the Android app and iOS simulator, take screenshots, and summarize findings - Posts one combined mobile QA summary back to the GitHub pull request with `github-comment` - Optionally uploads screenshots to Vercel Blob so the PR comment can link them @@ -17,7 +17,8 @@ This repo is a minimal Expo + CNG example for running AI-assisted Android and iO - [eas.json](./eas.json) - [.eas/workflows/agent-qa-mobile.yml](./.eas/workflows/agent-qa-mobile.yml) -- [scripts/agent-qa/index.ts](./scripts/agent-qa/index.ts) +- [cali.config.json](./cali.config.json) +- [scripts/agent-qa/run-and-export.sh](./scripts/agent-qa/run-and-export.sh) ## Required setup @@ -38,34 +39,33 @@ Optional environment variables for the QA job: ```bash npm install -npx tsc --noEmit +npx cali qa --help ``` -The workflow runner writes `section.md`, `status.txt`, and `report.json` to `artifacts/qa/` during execution. Temporary screenshots are written outside the workspace and uploaded to Vercel Blob when configured. +The workflow runner writes `section.md`, `status.txt`, `report.json`, and `cali-context.json` to `artifacts/qa/` during execution. Screenshots are written to `artifacts/qa/screenshots` and uploaded to Vercel Blob when configured. -To execute the runner directly with Node 24, provide the same environment variables the workflow sets: +To execute the QA command directly, provide the same inputs that the workflow uses: Android: ```bash AI_GATEWAY_API_KEY=... \ -QA_PLATFORM=android \ -APP_PATH=/absolute/path/to/app.apk \ -APPLICATION_ID=dev.expo.easagentdevice \ -BUILD_ID=test-build \ -PR_JSON='{"number":1,"title":"Test PR","body":"Smoke test"}' \ -node ./scripts/agent-qa/index.ts +./node_modules/.bin/cali qa \ + --env local-android \ + --artifact /absolute/path/to/app.apk \ + --app-id dev.expo.easagentdevice \ + --device ci-android \ + --prompt "verify the updated welcome title" ``` iOS simulator: ```bash AI_GATEWAY_API_KEY=... \ -QA_PLATFORM=ios \ -APP_PATH=/absolute/path/to/MyApp.app \ -APPLICATION_ID=dev.expo.easagentdevice \ -AGENT_DEVICE_IOS_DEVICE="iPhone 17" \ -BUILD_ID=test-build \ -PR_JSON='{"number":1,"title":"Test PR","body":"Smoke test"}' \ -node ./scripts/agent-qa/index.ts +./node_modules/.bin/cali qa \ + --env local-ios \ + --artifact /absolute/path/to/MyApp.app \ + --app-id dev.expo.easagentdevice \ + --device "iPhone 17" \ + --prompt "verify the updated welcome title" ``` diff --git a/cali.config.json b/cali.config.json new file mode 100644 index 0000000..e9f2afb --- /dev/null +++ b/cali.config.json @@ -0,0 +1,10 @@ +{ + "commands": { + "qa": { + "extraInstructions": [ + "When you need to verify whether text is visible on screen, prefer `snapshot` over `snapshot -i`. Use `snapshot -i` mainly for interactive exploration and choosing refs.", + "When you save screenshots, use short descriptive file names and include matching screenshotLabels so downstream PR comments can label them clearly." + ] + } + } +} diff --git a/package-lock.json b/package-lock.json index 7316351..705039e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,6 @@ "@react-navigation/bottom-tabs": "^7.4.0", "@react-navigation/elements": "^2.6.3", "@react-navigation/native": "^7.1.8", - "@vercel/blob": "^2.3.1", "expo": "^55.0.8", "expo-constants": "~55.0.9", "expo-font": "~55.0.4", @@ -37,22 +36,38 @@ }, "devDependencies": { "@types/react": "~19.2.10", - "agent-device": "^0.10.1", - "ai": "^6.0.116", + "cali": "0.4.0-0", "eslint": "^9.25.0", "eslint-config-expo": "~55.0.0", "typescript": "~5.9.2" } }, + "node_modules/@ai-sdk/anthropic": { + "version": "3.0.68", + "resolved": "https://registry.npmjs.org/@ai-sdk/anthropic/-/anthropic-3.0.68.tgz", + "integrity": "sha512-BAd+fmgYoJMmGw0/uV+jRlXX60PyGxelA6Clp4cK/NI0dsyv9jOOwzQmKNaz2nwb+Jz7HqI7I70KK4XtU5EcXQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider": "3.0.8", + "@ai-sdk/provider-utils": "4.0.23" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.25.76 || ^4.1.8" + } + }, "node_modules/@ai-sdk/gateway": { - "version": "3.0.66", - "resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-3.0.66.tgz", - "integrity": "sha512-SIQ0YY0iMuv+07HLsZ+bB990zUJ6S4ujORAh+Jv1V2KGNn73qQKnGO0JBk+w+Res8YqOFSycwDoWcFlQrVxS4A==", + "version": "3.0.93", + "resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-3.0.93.tgz", + "integrity": "sha512-8D6C9eEvDq6IgrdlWzpbniahDkoLiieTCrpzH8p/Hw63/0iPnZJ1uZcqxHrDIVDW/+aaGhBXqmx5C7HSd2eMmQ==", "dev": true, "license": "Apache-2.0", "dependencies": { "@ai-sdk/provider": "3.0.8", - "@ai-sdk/provider-utils": "4.0.19", + "@ai-sdk/provider-utils": "4.0.23", "@vercel/oidc": "3.1.0" }, "engines": { @@ -76,9 +91,9 @@ } }, "node_modules/@ai-sdk/provider-utils": { - "version": "4.0.19", - "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-4.0.19.tgz", - "integrity": "sha512-3eG55CrSWCu2SXlqq2QCsFjo3+E7+Gmg7i/oRVoSZzIodTuDSfLb3MRje67xE9RFea73Zao7Lm4mADIfUETKGg==", + "version": "4.0.23", + "resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-4.0.23.tgz", + "integrity": "sha512-z8GlDaCmRSDlqkMF2f4/RFgWxdarvIbyuk+m6WXT1LYgsnGiXRJGTD2Z1+SDl3LqtFuRtGX1aghYvQLoHL/9pg==", "dev": true, "license": "Apache-2.0", "dependencies": { @@ -2219,6 +2234,16 @@ "excpretty": "build/cli.js" } }, + "node_modules/@fastify/busboy": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.1.tgz", + "integrity": "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + } + }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", @@ -3278,6 +3303,13 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "license": "MIT" }, + "node_modules/@types/tinycolor2": { + "version": "1.4.6", + "resolved": "https://registry.npmjs.org/@types/tinycolor2/-/tinycolor2-1.4.6.tgz", + "integrity": "sha512-iEN8J0BoMnsWBqjVbWH/c0G0Hh7O21lpR2/+PrvAVgWdzL7eexIFm4JN/Wn10PTcmNdtS6U67r499mlWMXOxNw==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/yargs": { "version": "17.0.35", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.35.tgz", @@ -3864,22 +3896,6 @@ "win32" ] }, - "node_modules/@vercel/blob": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/@vercel/blob/-/blob-2.3.1.tgz", - "integrity": "sha512-6f9oWC+DbWxIgBLOdqjjn2/REpFrPDB7y5B5HA1ptYkzZaBgL6E34kWrptJvJ7teApJdbAs3I1a5A7z1y8SDHw==", - "license": "Apache-2.0", - "dependencies": { - "async-retry": "^1.3.3", - "is-buffer": "^2.0.5", - "is-node-process": "^1.2.0", - "throttleit": "^2.1.0", - "undici": "^6.23.0" - }, - "engines": { - "node": ">=20.0.0" - } - }, "node_modules/@vercel/oidc": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/@vercel/oidc/-/oidc-3.1.0.tgz", @@ -3956,42 +3972,16 @@ "node": ">= 14" } }, - "node_modules/agent-device": { - "version": "0.10.1", - "resolved": "https://registry.npmjs.org/agent-device/-/agent-device-0.10.1.tgz", - "integrity": "sha512-3k7yoXE4yVtTL0qokeurOtnO3W9bgMcQl88QUtp9dkSqiOHIczm8e0ynfHn1fMtKiV6N+vjdOcu06Z/ZPpBvFw==", - "dev": true, - "license": "MIT", - "dependencies": { - "pngjs": "^7.0.0" - }, - "bin": { - "agent-device": "bin/agent-device.mjs" - }, - "engines": { - "node": ">=22" - } - }, - "node_modules/agent-device/node_modules/pngjs": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-7.0.0.tgz", - "integrity": "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.19.0" - } - }, "node_modules/ai": { - "version": "6.0.116", - "resolved": "https://registry.npmjs.org/ai/-/ai-6.0.116.tgz", - "integrity": "sha512-7yM+cTmyRLeNIXwt4Vj+mrrJgVQ9RMIW5WO0ydoLoYkewIvsMcvUmqS4j2RJTUXaF1HphwmSKUMQ/HypNRGOmA==", + "version": "6.0.153", + "resolved": "https://registry.npmjs.org/ai/-/ai-6.0.153.tgz", + "integrity": "sha512-UlgBe4k0Ja1m1Eufn6FVSsHoF0sc7qwxX35ywJPDogIvBz0pHc+NOmCqiRY904DczNYIuwpZfKBLVz8HXgu3mg==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@ai-sdk/gateway": "3.0.66", + "@ai-sdk/gateway": "3.0.93", "@ai-sdk/provider": "3.0.8", - "@ai-sdk/provider-utils": "4.0.19", + "@ai-sdk/provider-utils": "4.0.23", "@opentelemetry/api": "1.9.0" }, "engines": { @@ -4292,6 +4282,7 @@ "version": "1.3.3", "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", "integrity": "sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==", + "dev": true, "license": "MIT", "dependencies": { "retry": "0.13.1" @@ -4728,6 +4719,69 @@ "node": ">= 0.8" } }, + "node_modules/cac": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/cac/-/cac-7.0.0.tgz", + "integrity": "sha512-tixWYgm5ZoOD+3g6UTea91eow5z6AAHaho3g0V9CNSNb45gM8SmflpAc+GRd1InC4AqN/07Unrgp56Y94N9hJQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/cali": { + "version": "0.4.0-0", + "resolved": "https://registry.npmjs.org/cali/-/cali-0.4.0-0.tgz", + "integrity": "sha512-xr6oQOb3k6dgXqkJZ9v3EXk8Ko4jZq9OqmOvAbNt36r1uA6qBitgjhmzfiESXow3mmqP4BE2WPhCPgx+5aCiPA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@ai-sdk/anthropic": "^3.0.64", + "@vercel/blob": "^0.27.0", + "ai": "^6.0.138", + "cac": "^7.0.0", + "cosmiconfig": "^9.0.1", + "dotenv": "^16.4.5", + "gradient-string": "^3.0.0", + "zod": "^4.3.6" + }, + "bin": { + "cali": "dist/index.js" + }, + "engines": { + "node": ">=22" + } + }, + "node_modules/cali/node_modules/@vercel/blob": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@vercel/blob/-/blob-0.27.3.tgz", + "integrity": "sha512-WizeAxzOTmv0JL7wOaxvLIU/KdBcrclM1ZUOdSlIZAxsTTTe1jsyBthStLby0Ueh7FnmKYAjLz26qRJTk5SDkQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "async-retry": "^1.3.3", + "is-buffer": "^2.0.5", + "is-node-process": "^1.2.0", + "throttleit": "^2.1.0", + "undici": "^5.28.4" + }, + "engines": { + "node": ">=16.14" + } + }, + "node_modules/cali/node_modules/undici": { + "version": "5.29.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.29.0.tgz", + "integrity": "sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@fastify/busboy": "^2.0.0" + }, + "engines": { + "node": ">=14.0" + } + }, "node_modules/call-bind": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz", @@ -5086,6 +5140,33 @@ "url": "https://opencollective.com/core-js" } }, + "node_modules/cosmiconfig": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz", + "integrity": "sha512-hr4ihw+DBqcvrsEDioRO31Z17x71pUYoNe/4h6Z0wB72p7MU7/9gH8Q3s12NFhHPfYBBOV3qyfUxmr/Yn3shnQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "env-paths": "^2.2.1", + "import-fresh": "^3.3.0", + "js-yaml": "^4.1.0", + "parse-json": "^5.2.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/d-fischer" + }, + "peerDependencies": { + "typescript": ">=4.9.5" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, "node_modules/cross-fetch": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz", @@ -5331,6 +5412,19 @@ "node": ">=0.10.0" } }, + "node_modules/dotenv": { + "version": "16.6.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz", + "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -5373,6 +5467,33 @@ "node": ">= 0.8" } }, + "node_modules/env-paths": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", + "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/error-ex": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz", + "integrity": "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-arrayish": "^0.2.1" + } + }, + "node_modules/error-ex/node_modules/is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", + "dev": true, + "license": "MIT" + }, "node_modules/error-stack-parser": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/error-stack-parser/-/error-stack-parser-2.1.4.tgz", @@ -5790,6 +5911,7 @@ "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -7282,6 +7404,33 @@ "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", "license": "ISC" }, + "node_modules/gradient-string": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/gradient-string/-/gradient-string-3.0.0.tgz", + "integrity": "sha512-frdKI4Qi8Ihp4C6wZNB565de/THpIaw3DjP5ku87M+N9rNSGmPTjfkq61SdRXB7eCaL8O1hkKDvf6CDMtOzIAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "chalk": "^5.3.0", + "tinygradient": "^1.1.5" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gradient-string/node_modules/chalk": { + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", + "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, "node_modules/has-bigints": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz", @@ -7667,6 +7816,7 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.5.tgz", "integrity": "sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==", + "dev": true, "funding": [ { "type": "github", @@ -7885,6 +8035,7 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/is-node-process/-/is-node-process-1.2.0.tgz", "integrity": "sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw==", + "dev": true, "license": "MIT" }, "node_modules/is-number": { @@ -8348,6 +8499,13 @@ "dev": true, "license": "MIT" }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true, + "license": "MIT" + }, "node_modules/json-schema": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz", @@ -8722,6 +8880,13 @@ "url": "https://opencollective.com/parcel" } }, + "node_modules/lines-and-columns": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", + "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", + "dev": true, + "license": "MIT" + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -9820,6 +9985,25 @@ "node": ">=6" } }, + "node_modules/parse-json": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", + "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/parse-png": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/parse-png/-/parse-png-2.1.0.tgz", @@ -10820,6 +11004,7 @@ "version": "0.13.1", "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "dev": true, "license": "MIT", "engines": { "node": ">= 4" @@ -11724,6 +11909,7 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-2.1.0.tgz", "integrity": "sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==", + "dev": true, "license": "MIT", "engines": { "node": ">=18" @@ -11732,6 +11918,13 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/tinycolor2": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz", + "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==", + "dev": true, + "license": "MIT" + }, "node_modules/tinyglobby": { "version": "0.2.15", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", @@ -11781,6 +11974,17 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/tinygradient": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/tinygradient/-/tinygradient-1.1.5.tgz", + "integrity": "sha512-8nIfc2vgQ4TeLnk2lFj4tRLvvJwEfQuabdsmvDdQPT0xlk9TaNtpGd6nNRxXoK6vQhN6RSzj+Cnp5tTQmpxmbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/tinycolor2": "^1.4.0", + "tinycolor2": "^1.0.0" + } + }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -12034,15 +12238,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/undici": { - "version": "6.24.1", - "resolved": "https://registry.npmjs.org/undici/-/undici-6.24.1.tgz", - "integrity": "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA==", - "license": "MIT", - "engines": { - "node": ">=18.17" - } - }, "node_modules/undici-types": { "version": "7.18.2", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", diff --git a/package.json b/package.json index 1c647f8..064e622 100644 --- a/package.json +++ b/package.json @@ -5,18 +5,17 @@ "scripts": { "start": "expo start", "reset-project": "node ./scripts/reset-project.js", - "android": "expo start --android", - "ios": "expo start --ios", + "android": "expo run:android", + "ios": "expo run:ios", "web": "expo start --web", "lint": "expo lint", - "agent-qa": "node ./scripts/agent-qa/index.ts" + "agent-qa": "cali qa --env mobile-pr --quiet" }, "dependencies": { "@expo/vector-icons": "^15.0.3", "@react-navigation/bottom-tabs": "^7.4.0", "@react-navigation/elements": "^2.6.3", "@react-navigation/native": "^7.1.8", - "@vercel/blob": "^2.3.1", "expo": "^55.0.8", "expo-constants": "~55.0.9", "expo-font": "~55.0.4", @@ -41,8 +40,7 @@ }, "devDependencies": { "@types/react": "~19.2.10", - "agent-device": "^0.10.1", - "ai": "^6.0.116", + "cali": "0.4.0-0", "eslint": "^9.25.0", "eslint-config-expo": "~55.0.0", "typescript": "~5.9.2" diff --git a/scripts/agent-qa/index.ts b/scripts/agent-qa/index.ts deleted file mode 100644 index 138e494..0000000 --- a/scripts/agent-qa/index.ts +++ /dev/null @@ -1,946 +0,0 @@ -import { execFile as execFileCallback } from 'node:child_process'; -import { existsSync } from 'node:fs'; -import { mkdir, readFile, readdir, stat, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; -import process from 'node:process'; -import { promisify } from 'node:util'; - -import { put } from '@vercel/blob'; -import { ToolLoopAgent, gateway, jsonSchema } from 'ai'; - -type SkillMetadata = { - name: string; - description: string; - directoryPath: string; - skillFilePath: string; -}; - -type QaPlatform = 'android' | 'ios'; - -type ScreenshotInfo = { - fileName: string; - absolutePath: string; - bytes: number; - label?: string; - blobUrl?: string; - blobDownloadUrl?: string; - blobPathname?: string; - uploadError?: string; -}; - -type ScreenshotLabel = { - fileName: string; - label: string; -}; - -type AgentDeviceTraceEntry = { - command: string; - ok: boolean; - exitCode: number; - stdout: string; - stderr: string; -}; - -type ResultStatus = 'passed' | 'failed' | 'blocked' | 'not_tested' | 'unsure'; - -type ReportInput = { - overallStatus: ResultStatus; - summary: string; - checked?: string[]; - issues?: string[]; - nextSteps?: string[]; - screenshotLabels?: ScreenshotLabel[]; -}; - -type Report = ReportInput & { - generatedAt: string; - model: string; - buildId: string; - workflowUrl: string; - platform: QaPlatform; - platformLabel: string; - prNumber: number; - screenshots: ScreenshotInfo[]; - agentDeviceTrace: AgentDeviceTraceEntry[]; -}; - -type ParsedPr = { - number?: number; - title?: string; - body?: string | null; - draft?: boolean; - labels?: Array<{ name?: string }>; -}; - -type CommandResult = { - ok: boolean; - exitCode: number; - stdout: string; - stderr: string; -}; - -type CommandOptions = { - cwd?: string; - allowFailure?: boolean; -}; - -type ExecFileError = Error & { - stdout?: string; - stderr?: string; - code?: number | string; -}; - -const execFile = promisify(execFileCallback); -const ROOT_DIR = process.cwd(); -const ARTIFACTS_DIR = path.join(ROOT_DIR, 'artifacts', 'qa'); -const SCREENSHOTS_DIR = path.join(tmpdir(), 'agent-qa-screenshots'); -const REPORT_PATH = path.join(ARTIFACTS_DIR, 'report.json'); -const SECTION_PATH = path.join(ARTIFACTS_DIR, 'section.md'); -const STATUS_PATH = path.join(ARTIFACTS_DIR, 'status.txt'); -const AGENT_DEVICE_BIN = 'agent-device'; -const QA_PLATFORM = normalizePlatform(process.env.QA_PLATFORM); -const APP_PATH = process.env.APP_PATH; -const BOOTSTRAP_ERROR = process.env.AGENT_QA_BOOTSTRAP_ERROR; -const BLOB_READ_WRITE_TOKEN = process.env.BLOB_READ_WRITE_TOKEN; -const MODEL_ID = process.env.QA_MODEL || 'openai/gpt-5.4-mini'; -const EMPTY_INPUT_SCHEMA = jsonSchema({ - type: 'object', - properties: {}, - additionalProperties: false, -}); -const SKILL_DIRECTORIES = [ - path.join(ROOT_DIR, 'node_modules', 'agent-device', 'skills'), -]; - -const pr = parseJson(process.env.PR_JSON, {}); -const context = { - platform: QA_PLATFORM, - platformLabel: QA_PLATFORM === 'ios' ? 'iOS' : 'Android', - buildId: process.env.BUILD_ID || '', - buildPath: APP_PATH || '', - prNumber: Number(pr.number || 0), - workflowUrl: process.env.WORKFLOW_URL || '', - applicationId: process.env.APPLICATION_ID || '', - deviceName: - process.env.DEVICE_NAME || - (QA_PLATFORM === 'ios' - ? process.env.AGENT_DEVICE_IOS_DEVICE || '' - : process.env.AGENT_DEVICE_ANDROID_DEVICE || ''), -}; -const agentDeviceTrace: AgentDeviceTraceEntry[] = []; - -function normalizePlatform(value: string | undefined): QaPlatform { - return value === 'ios' ? 'ios' : 'android'; -} - -function parseJson(value: string | undefined, fallback: T): T { - if (!value) { - return fallback; - } - - try { - return JSON.parse(value) as T; - } catch { - return fallback; - } -} - -function trim(value: string, max = 6000): string { - if (value.length <= max) { - return value; - } - - return `${value.slice(0, max)}\n...`; -} - -function humanizeScreenshotLabel(fileName: string): string { - const stem = fileName.replace(/\.[^.]+$/, ''); - const words = stem - .split(/[-_]+/g) - .filter(Boolean) - .map((word) => word.charAt(0).toUpperCase() + word.slice(1)); - return words.join(' ') || fileName; -} - -function stripFrontmatter(content: string): string { - const match = content.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/); - return match ? content.slice(match[0].length).trim() : content.trim(); -} - -function parseFrontmatter(content: string): { - name: string; - description: string; -} { - const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/); - if (!match?.[1]) { - throw new Error('No frontmatter found'); - } - - const frontmatter = match[1]; - const nameMatch = frontmatter.match(/^name:\s*(.+)$/m); - const descriptionMatch = frontmatter.match(/^description:\s*(.+)$/m); - const name = nameMatch?.[1]?.trim().replace(/^['"]|['"]$/g, ''); - const description = descriptionMatch?.[1] - ?.trim() - .replace(/^['"]|['"]$/g, ''); - - if (!name || !description) { - throw new Error('Skill frontmatter is missing name or description'); - } - - return { name, description }; -} - -async function discoverSkills(directories: string[]): Promise { - const skills: SkillMetadata[] = []; - const seenNames = new Set(); - - for (const directory of directories) { - let entries; - try { - entries = await readdir(directory, { withFileTypes: true }); - } catch { - continue; - } - - for (const entry of entries) { - if (!entry.isDirectory()) { - continue; - } - - const skillDirectoryPath = path.join(directory, entry.name); - const skillFilePath = path.join(skillDirectoryPath, 'SKILL.md'); - - try { - const content = await readFile(skillFilePath, 'utf8'); - const frontmatter = parseFrontmatter(content); - - if (seenNames.has(frontmatter.name.toLowerCase())) { - continue; - } - - seenNames.add(frontmatter.name.toLowerCase()); - skills.push({ - name: frontmatter.name, - description: frontmatter.description, - directoryPath: skillDirectoryPath, - skillFilePath, - }); - } catch { - continue; - } - } - } - - return skills.sort((left, right) => left.name.localeCompare(right.name)); -} - -function buildSkillsPrompt(skills: SkillMetadata[]): string { - if (skills.length === 0) { - return 'No local skills were discovered for this run.'; - } - - const skillList = skills - .map((skill) => `- ${skill.name}: ${skill.description}`) - .join('\n'); - - return [ - 'Available local skills:', - skillList, - '', - 'Load a skill before relying on its instructions. Use read_skill_file only for files inside the loaded skill directory.', - ].join('\n'); -} - -function findSkill(skills: SkillMetadata[], name: string): SkillMetadata { - const skill = skills.find( - (candidate) => candidate.name.toLowerCase() === name.toLowerCase(), - ); - - if (!skill) { - throw new Error(`Skill not found: ${name}`); - } - - return skill; -} - -function resolveSkillFilePath(skill: SkillMetadata, relativeFilePath: string): string { - const absolutePath = path.resolve(skill.directoryPath, relativeFilePath); - const relativePath = path.relative(skill.directoryPath, absolutePath); - const normalizedRelativePath = relativePath.split(path.sep).join('/'); - - if ( - normalizedRelativePath === '' || - normalizedRelativePath.startsWith('../') || - normalizedRelativePath === '..' - ) { - throw new Error( - `Refusing to read a path outside the skill directory: ${relativeFilePath}`, - ); - } - - return absolutePath; -} - -function ensureRequiredAgentQaEnvs(): void { - if (!process.env.AI_GATEWAY_API_KEY) { - throw new Error( - 'Missing required environment variable: AI_GATEWAY_API_KEY', - ); - } - if (!APP_PATH) { - throw new Error('Missing required environment variable: APP_PATH'); - } - if (!context.applicationId) { - throw new Error('Missing required environment variable: APPLICATION_ID'); - } - if (context.platform === 'ios' && !context.deviceName) { - throw new Error( - 'Missing required environment variable: AGENT_DEVICE_IOS_DEVICE', - ); - } -} - -async function runCommand( - file: string, - args: string[], - options: CommandOptions = {}, -): Promise { - const { cwd = ROOT_DIR, allowFailure = false } = options; - - try { - const result = await execFile(file, args, { - cwd, - env: process.env, - maxBuffer: 20 * 1024 * 1024, - }); - - return { - ok: true, - exitCode: 0, - stdout: result.stdout ?? '', - stderr: result.stderr ?? '', - }; - } catch (unknownError) { - const error = unknownError as ExecFileError; - const stdout = typeof error.stdout === 'string' ? error.stdout : ''; - const stderr = - typeof error.stderr === 'string' ? error.stderr : error.message; - const exitCode = typeof error.code === 'number' ? error.code : 1; - - if (!allowFailure) { - throw new Error( - [`Command failed: ${file} ${args.join(' ')}`, stderr || stdout] - .filter(Boolean) - .join('\n\n'), - ); - } - - return { - ok: false, - exitCode, - stdout, - stderr, - }; - } -} - -async function runAgentDeviceCommand(command: string, args: string[] = []): Promise<{ - ok: boolean; - exitCode: number; - stdout: string; - stderr: string; - json: unknown; -}> { - const result = await runCommand(AGENT_DEVICE_BIN, [command, ...args], { - allowFailure: true, - }); - - agentDeviceTrace.push({ - command: [command, ...args].join(' '), - ok: result.ok, - exitCode: result.exitCode, - stdout: trim(result.stdout, 4000), - stderr: trim(result.stderr, 2000), - }); - - return { - ok: result.ok, - exitCode: result.exitCode, - stdout: trim(result.stdout, 8000), - stderr: trim(result.stderr, 4000), - json: parseJson(result.stdout, null as unknown), - }; -} - -async function ensureArtifactsDir(): Promise { - await mkdir(ARTIFACTS_DIR, { recursive: true }); -} - -async function ensureScreenshotsDir(): Promise { - await mkdir(SCREENSHOTS_DIR, { recursive: true }); -} - -async function listScreenshots(): Promise { - if (!existsSync(SCREENSHOTS_DIR)) { - return []; - } - - const entries = await readdir(SCREENSHOTS_DIR); - const screenshots: ScreenshotInfo[] = []; - for (const entry of entries) { - if (!entry.endsWith('.png')) { - continue; - } - - const absolutePath = path.join(SCREENSHOTS_DIR, entry); - const fileStat = await stat(absolutePath); - screenshots.push({ - fileName: entry, - absolutePath, - bytes: fileStat.size, - }); - } - - return screenshots.sort((left, right) => - left.fileName.localeCompare(right.fileName), - ); -} - -async function uploadScreenshotsToBlob( - screenshots: ScreenshotInfo[], -): Promise { - if (!BLOB_READ_WRITE_TOKEN || screenshots.length === 0) { - return screenshots; - } - - return Promise.all( - screenshots.map(async (screenshot) => { - try { - const fileBuffer = await readFile(screenshot.absolutePath); - const pathnameParts = [ - 'agent-qa', - context.platform, - context.prNumber ? `pr-${context.prNumber}` : 'pr-unknown', - context.buildId || 'local-build', - screenshot.fileName, - ]; - const pathname = pathnameParts.join('/'); - const blob = await put(pathname, fileBuffer, { - access: 'public', - addRandomSuffix: true, - contentType: 'image/png', - token: BLOB_READ_WRITE_TOKEN, - }); - - return { - ...screenshot, - blobUrl: blob.url, - blobDownloadUrl: blob.downloadUrl, - blobPathname: blob.pathname, - }; - } catch (unknownError) { - const error = - unknownError instanceof Error - ? unknownError - : new Error(String(unknownError)); - - console.error( - `Failed to upload screenshot ${screenshot.fileName} to Vercel Blob: ${error.message}`, - ); - - return { - ...screenshot, - uploadError: error.message, - }; - } - }), - ); -} - -async function writeBlockedReport(error: Error): Promise { - const summary: ReportInput = { - overallStatus: 'blocked', - summary: error.message, - checked: [ - `Attempted to run ${context.platformLabel} QA agent on PR changes`, - ], - issues: [error.message], - nextSteps: [ - 'Check the workflow logs for command failures.', - `Verify AI_GATEWAY_API_KEY, ${context.platformLabel} build availability, and ${context.platform === 'ios' ? 'simulator' : 'emulator'} configuration.`, - ], - }; - - await persistReport(summary); -} - -async function persistReport(reportInput: ReportInput) { - await ensureArtifactsDir(); - await ensureScreenshotsDir(); - const screenshotLabelMap = new Map( - (reportInput.screenshotLabels || []) - .filter( - (item): item is ScreenshotLabel => - Boolean(item?.fileName) && Boolean(item?.label), - ) - .map((item) => [item.fileName, item.label.trim()]), - ); - const screenshots = (await uploadScreenshotsToBlob(await listScreenshots())).map( - (screenshot) => ({ - ...screenshot, - label: - screenshotLabelMap.get(screenshot.fileName) || - humanizeScreenshotLabel(screenshot.fileName), - }), - ); - const report: Report = { - generatedAt: new Date().toISOString(), - model: MODEL_ID, - buildId: context.buildId, - workflowUrl: context.workflowUrl, - platform: context.platform, - platformLabel: context.platformLabel, - prNumber: context.prNumber, - screenshots, - agentDeviceTrace: agentDeviceTrace.slice(-20), - ...reportInput, - }; - - await writeFile(REPORT_PATH, `${JSON.stringify(report, null, 2)}\n`, 'utf8'); - await writeFile(SECTION_PATH, trim(renderPlatformSection(report), 16000), 'utf8'); - await writeFile(STATUS_PATH, `${report.overallStatus}\n`, 'utf8'); -} - -function renderScreenshotRows( - screenshots: ScreenshotInfo[], - platformLabel: string, -): string[] { - if (screenshots.length === 0) { - return ['- No screenshots were saved.']; - } - - const screenshotRows = screenshots.map((screenshot) => { - if (screenshot.blobUrl) { - return `| ${screenshot.fileName} |`; - } - - const details = [screenshot.fileName, `${screenshot.bytes} bytes`]; - if (screenshot.uploadError) { - details.push(`upload failed: ${screenshot.uploadError}`); - } - - return details.join(', '); - }); - - if (screenshots.some((screenshot) => screenshot.blobUrl)) { - return [ - `| ${platformLabel} |`, - '| --- |', - ...screenshotRows.filter((row) => row.startsWith('|')), - ]; - } - - return screenshotRows - .filter((value) => !value.startsWith('|')) - .map((row) => `- ${row}`); -} - -function getStatusEmoji(status: ResultStatus): string { - switch (status) { - case 'passed': - return '✅'; - case 'failed': - return '❌'; - case 'blocked': - return '⛔'; - case 'unsure': - return '🤔'; - case 'not_tested': - default: - return '⚪'; - } -} - -function renderPlatformSection(report: Report): string { - const lines = [ - `### ${report.platformLabel}`, - '', - `**Status:** ${getStatusEmoji(report.overallStatus)} ${report.overallStatus}`, - '', - report.summary || 'No summary was provided.', - '', - '### Checked', - ]; - - if (report.checked?.length) { - for (const item of report.checked) { - lines.push(`- ${item}`); - } - } else { - lines.push('- No checks were recorded.'); - } - - lines.push('', '### Issues'); - if (report.issues?.length) { - for (const issue of report.issues) { - lines.push(`- ${issue}`); - } - } else { - lines.push('- No issues noted.'); - } - - lines.push('', '### Screenshots'); - lines.push(...renderScreenshotRows(report.screenshots || [], report.platformLabel)); - - lines.push('', '### Next steps'); - if (report.nextSteps?.length) { - for (const step of report.nextSteps) { - lines.push(`- ${step}`); - } - } else { - lines.push('- No follow-up actions were suggested.'); - } - - lines.push('', '### Metadata'); - lines.push(`- Build ID: \`${report.buildId || 'n/a'}\``); - lines.push(`- Workflow: ${report.workflowUrl || 'n/a'}`); - lines.push('', '### JSON Report', ''); - lines.push('```json'); - lines.push(JSON.stringify(report, null, 2)); - lines.push('```'); - - return `${lines.join('\n')}\n`; -} - -function buildPrompt(skills: SkillMetadata[]): string { - const prTitle = pr.title || 'Untitled PR'; - const prBody = pr.body || 'No PR body was provided.'; - const platformSpecificContext = - context.platform === 'ios' - ? [`- Preferred iOS simulator: ${context.deviceName || 'n/a'}`] - : [`- Preferred Android device: ${context.deviceName || 'n/a'}`]; - const platformSpecificFlow = - context.platform === 'ios' - ? `For iOS simulator runs, the workflow already booted the app on ${context.deviceName}. Do not pass --device, --udid, or --session in normal app commands.` - : `For Android runs, the workflow already booted the app on ${context.deviceName || 'the booted emulator'}.`; - - return [ - `Review this pull request and run a lightweight ${context.platformLabel} QA pass.`, - '', - `PR #${context.prNumber}: ${prTitle}`, - '', - prBody, - '', - 'Execution context:', - `- Build ID: ${context.buildId || 'n/a'}`, - `- Build path: ${context.buildPath || 'n/a'}`, - `- Platform: ${context.platformLabel}`, - `- Application id: ${context.applicationId || 'n/a'}`, - ...platformSpecificContext, - `- Workflow URL: ${context.workflowUrl || 'n/a'}`, - `- Temporary screenshot directory: ${SCREENSHOTS_DIR}`, - '', - buildSkillsPrompt(skills), - '', - platformSpecificFlow, - `You must infer concise acceptance criteria from the PR, test only the highest-signal ${context.platformLabel} flows, load the relevant local skill before relying on it, save temporary screenshots into ${SCREENSHOTS_DIR}/*.png, and call write_report exactly once before finishing.`, - 'When you need to verify that text is actually visible on screen, prefer plain snapshot over snapshot -i. Use snapshot -i mainly for exploration and choosing refs.', - 'Use short, descriptive screenshot file names and include matching screenshotLabels with brief route or state labels like Home, Explore, or Welcome screen.', - 'If the accessibility tree or snapshot text is inconclusive but the screenshots likely show the changed UI, use overallStatus "unsure" instead of "blocked" or "failed".', - 'Do not end with plain text. Your final action must be a write_report tool call.', - ].join('\n'); -} - -function hasToolActivity( - steps: Array<{ - toolCalls?: Array<{ toolName?: string }>; - toolResults?: Array<{ toolName?: string }>; - }>, - toolName: string, -): boolean { - return steps.some((step) => { - const calledTool = step.toolCalls?.some((call) => call.toolName === toolName); - const completedTool = step.toolResults?.some( - (result) => result.toolName === toolName, - ); - return Boolean(calledTool || completedTool); - }); -} - -async function main(): Promise { - await ensureArtifactsDir(); - await ensureScreenshotsDir(); - ensureRequiredAgentQaEnvs(); - if (BOOTSTRAP_ERROR) { - await writeBlockedReport(new Error(BOOTSTRAP_ERROR)); - return; - } - const skills = await discoverSkills(SKILL_DIRECTORIES); - - const agent = new ToolLoopAgent({ - model: gateway(MODEL_ID), - instructions: [ - `You are a ${context.platformLabel} QA agent running inside EAS Workflows.`, - 'Treat the app and repository as a black box.', - 'Infer a short list of acceptance criteria from PR metadata, focusing on user-visible behavior.', - 'The workflow has already installed and launched the app before the agent starts.', - 'Use the local skills list in the prompt. Load a relevant skill before making non-trivial command choices.', - context.platform === 'ios' - ? `For iOS simulator runs, the workflow already booted and bound the simulator ${context.deviceName}. Do not pass --device, --udid, --serial, or --session in normal app commands.` - : 'For Android runs, the workflow already booted and bound the emulator.', - 'When verifying whether text is visible on screen, prefer plain snapshot. Use snapshot -i mainly for interactive exploration and choosing refs.', - `Take screenshots for meaningful states and save them temporarily in ${SCREENSHOTS_DIR} with .png filenames.`, - 'After any UI transition, refresh your understanding with snapshot or diff snapshot.', - 'Do not inspect repository source files, run git commands, or modify project code. The only allowed filesystem writes are the QA report files and temporary screenshots.', - 'Do not claim success without evidence from tool results.', - 'The workflow pre-binds the mobile target. Avoid explicit routing flags like --device, --udid, --serial, or --session in normal app commands unless you are inspecting device inventory.', - 'When you save screenshots, use short descriptive file names and include matching screenshotLabels in write_report so the PR comment can label them clearly.', - 'If text-based automation evidence is inconclusive but screenshots likely show the relevant UI, report overallStatus as unsure.', - 'If a prerequisite is missing or the environment is broken, mark the relevant checks as blocked.', - 'When you are done with the simulator or emulator session, prefer close --shutdown.', - 'You must call write_report exactly once before you finish.', - 'Never finish by returning plain text. Finish only by calling write_report.', - ].join(' '), - toolChoice: 'required', - prepareStep: async ({ steps, stepNumber }) => { - const hasWrittenReport = hasToolActivity(steps, 'write_report'); - const hasUsedDeviceTools = hasToolActivity(steps, 'agent_device'); - - if (hasWrittenReport || !hasUsedDeviceTools || stepNumber < 6) { - return undefined; - } - - return { - activeTools: ['write_report'], - toolChoice: { type: 'tool', toolName: 'write_report' }, - }; - }, - tools: { - get_pr_context: { - description: - 'Read the GitHub pull request context and workflow metadata for this QA run.', - inputSchema: EMPTY_INPUT_SCHEMA, - execute: async () => ({ - prNumber: context.prNumber, - title: pr.title || '', - body: pr.body || '', - labels: Array.isArray(pr.labels) - ? pr.labels.map((label) => label.name).filter(Boolean) - : [], - draft: Boolean(pr.draft), - buildId: context.buildId, - buildPath: context.buildPath, - workflowUrl: context.workflowUrl, - platform: context.platform, - platformLabel: context.platformLabel, - applicationId: context.applicationId, - deviceName: context.deviceName, - }), - }, - load_skill: { - description: - 'Load a local skill and return its instructions plus the skill directory path.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - name: { - type: 'string', - description: 'Skill name from the available local skills list.', - }, - }, - required: ['name'], - additionalProperties: false, - }), - execute: async ({ name }: { name: string }) => { - const skill = findSkill(skills, name); - const content = await readFile(skill.skillFilePath, 'utf8'); - return { - name: skill.name, - description: skill.description, - skillDirectory: skill.directoryPath, - skillFilePath: skill.skillFilePath, - content: stripFrontmatter(content), - }; - }, - }, - read_skill_file: { - description: - 'Read a text file inside a loaded skill directory, such as references or scripts.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - skillName: { - type: 'string', - description: 'Skill name from the available local skills list.', - }, - path: { - type: 'string', - description: - 'Path relative to the skill directory, such as references/foo.md.', - }, - startLine: { - type: 'integer', - minimum: 1, - description: '1-based line number to start reading from.', - }, - maxLines: { - type: 'integer', - minimum: 1, - maximum: 400, - description: 'Maximum number of lines to read.', - }, - }, - required: ['skillName', 'path'], - additionalProperties: false, - }), - execute: async ({ - skillName, - path: relativeFilePath, - startLine = 1, - maxLines = 200, - }: { - skillName: string; - path: string; - startLine?: number; - maxLines?: number; - }) => { - const skill = findSkill(skills, skillName); - const absolutePath = resolveSkillFilePath(skill, relativeFilePath); - const content = await readFile(absolutePath, 'utf8'); - const lines = content.split('\n'); - const slice = lines.slice( - Math.max(startLine - 1, 0), - Math.max(startLine - 1, 0) + maxLines, - ); - - return { - skillName: skill.name, - absolutePath, - startLine, - endLine: startLine + slice.length - 1, - content: slice.join('\n'), - }; - }, - }, - agent_device: { - description: - 'Run an agent-device command for mobile UI automation and screenshot capture.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - command: { - type: 'string', - description: - 'The first agent-device subcommand to run, such as devices, reinstall, open, snapshot, press, fill, or screenshot.', - }, - args: { - type: 'array', - items: { type: 'string' }, - description: - `Remaining CLI arguments. Use ${SCREENSHOTS_DIR}/*.png for screenshots.`, - }, - }, - required: ['command'], - additionalProperties: false, - }), - execute: async ({ - command, - args = [], - }: { - command: string; - args?: string[]; - }) => runAgentDeviceCommand(command, args), - }, - write_report: { - description: - 'Persist the final QA summary, findings, and screenshot index to artifacts/qa.', - inputSchema: jsonSchema({ - type: 'object', - properties: { - overallStatus: { - type: 'string', - enum: ['passed', 'failed', 'blocked', 'not_tested', 'unsure'], - }, - summary: { - type: 'string', - }, - checked: { - type: 'array', - items: { type: 'string' }, - }, - issues: { - type: 'array', - items: { type: 'string' }, - }, - nextSteps: { - type: 'array', - items: { type: 'string' }, - }, - screenshotLabels: { - type: 'array', - items: { - type: 'object', - properties: { - fileName: { - type: 'string', - description: 'Saved screenshot file name, including .png.', - }, - label: { - type: 'string', - description: - 'Very short route or state label for this screenshot, such as Home or Welcome screen.', - }, - }, - required: ['fileName', 'label'], - additionalProperties: false, - }, - }, - }, - required: ['overallStatus', 'summary'], - additionalProperties: false, - }), - execute: async (input: ReportInput) => persistReport(input), - }, - }, - }); - - const result = await agent.generate({ - prompt: buildPrompt(skills), - }); - - if (result.text) { - console.log(trim(`Agent finished with final text:\n${result.text}`, 4000)); - } - - if (!existsSync(SECTION_PATH)) { - await persistReport({ - overallStatus: 'blocked', - summary: result.text || 'The agent completed without calling write_report.', - checked: [`Produce a ${context.platformLabel} QA report`], - issues: ['The write_report tool was not called by the agent.'], - nextSteps: [ - 'Inspect the workflow logs and tighten the agent instructions.', - ], - }); - console.log( - `Fallback QA report written to ${SECTION_PATH} because write_report was not called.`, - ); - return; - } - - console.log(`QA report written to ${SECTION_PATH}`); -} - -try { - await main(); -} catch (unknownError) { - const message = - unknownError instanceof Error - ? unknownError - : new Error(String(unknownError)); - console.error(message); - await writeBlockedReport(message); - process.exitCode = 1; -} diff --git a/scripts/agent-qa/package.json b/scripts/agent-qa/package.json deleted file mode 100644 index 3dbc1ca..0000000 --- a/scripts/agent-qa/package.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "module" -} diff --git a/scripts/agent-qa/provision-ios-simulator.sh b/scripts/agent-qa/provision-ios-simulator.sh deleted file mode 100644 index f708763..0000000 --- a/scripts/agent-qa/provision-ios-simulator.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -set -euxo pipefail - -DEVICE_NAME="${AGENT_DEVICE_IOS_DEVICE:?AGENT_DEVICE_IOS_DEVICE is required}" -export AGENT_DEVICE_DAEMON_TIMEOUT_MS="${AGENT_DEVICE_DAEMON_TIMEOUT_MS:-180000}" -export AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS="${AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS:-180000}" - -agent-device ensure-simulator --platform ios --device "${DEVICE_NAME}" --boot - -if command -v set-env >/dev/null 2>&1; then - set-env AGENT_DEVICE_IOS_DEVICE "${DEVICE_NAME}" -fi diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh index 55a3def..f1ba9f9 100644 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -5,6 +5,11 @@ set -uo pipefail APP_PATH_ARG="${1:?APP_PATH argument is required}" QA_PLATFORM_VALUE="${QA_PLATFORM:?QA_PLATFORM is required}" APPLICATION_ID_VALUE="${APPLICATION_ID:?APPLICATION_ID is required}" +OUTPUT_DIR="artifacts/qa" +CONTEXT_PATH="${OUTPUT_DIR}/cali-context.json" +SCREENSHOTS_DIR="${OUTPUT_DIR}/screenshots" + +mkdir -p "${OUTPUT_DIR}" case "${QA_PLATFORM_VALUE}" in ios) @@ -18,37 +23,64 @@ case "${QA_PLATFORM_VALUE}" in ;; esac -set +e export APP_PATH="${APP_PATH_ARG}" - -BOOTSTRAP_ERROR="" -if [ "${QA_PLATFORM_VALUE}" = "android" ]; then - BOOTSTRAP_STEP="install" - agent-device install "${APPLICATION_ID_VALUE}" "${APP_PATH}" -else - BOOTSTRAP_STEP="reinstall" - agent-device reinstall "${APPLICATION_ID_VALUE}" "${APP_PATH}" -fi -BOOTSTRAP_EXIT=$? - -if [ "${BOOTSTRAP_EXIT}" -ne 0 ] && [ "${QA_PLATFORM_VALUE}" = "android" ]; then - BOOTSTRAP_STEP="reinstall" - agent-device reinstall "${APPLICATION_ID_VALUE}" "${APP_PATH}" - BOOTSTRAP_EXIT=$? -fi - -if [ "${BOOTSTRAP_EXIT}" -eq 0 ]; then - BOOTSTRAP_STEP="open" - agent-device open "${APPLICATION_ID_VALUE}" --relaunch - BOOTSTRAP_EXIT=$? +DEVICE_NAME_VALUE="${DEVICE_NAME:-}" +if [ -z "${DEVICE_NAME_VALUE}" ]; then + if [ "${QA_PLATFORM_VALUE}" = "ios" ]; then + DEVICE_NAME_VALUE="${AGENT_DEVICE_IOS_DEVICE:-}" + else + DEVICE_NAME_VALUE="${AGENT_DEVICE_ANDROID_DEVICE:-}" + fi fi -if [ "${BOOTSTRAP_EXIT}" -ne 0 ]; then - BOOTSTRAP_ERROR="Deterministic ${PLATFORM_LABEL} app bootstrap failed during ${BOOTSTRAP_STEP}. See workflow logs above." -fi +jq -n \ + --arg workspaceRoot "${PWD}" \ + --arg platform "${QA_PLATFORM_VALUE}" \ + --arg artifactPath "${APP_PATH_ARG}" \ + --arg appId "${APPLICATION_ID_VALUE}" \ + --arg deviceName "${DEVICE_NAME_VALUE}" \ + --arg buildId "${BUILD_ID:-}" \ + --arg workflowUrl "${WORKFLOW_URL:-}" \ + --arg outputDir "${OUTPUT_DIR}" \ + --arg screenshotsDir "${SCREENSHOTS_DIR}" \ + --argjson pr "${PR_JSON:-{}}" \ + ' + { + workspaceRoot: $workspaceRoot, + pullRequest: + if ($pr | type) == "object" and (($pr | keys) | length) > 0 then + { + number: ($pr.number // null), + title: ($pr.title // null), + body: ($pr.body // null), + url: ($pr.html_url // $pr.url // null), + labels: (($pr.labels // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), + isDraft: ($pr.draft // false), + baseBranch: ($pr.base.ref // null), + headBranch: ($pr.head.ref // null) + } + else + null + end, + mobile: { + platform: $platform, + artifactPath: $artifactPath, + appId: $appId, + deviceName: if $deviceName == "" then null else $deviceName end + }, + build: { + id: if $buildId == "" then null else $buildId end, + workflowUrl: if $workflowUrl == "" then null else $workflowUrl end + }, + output: { + outputDir: $outputDir, + screenshotsDir: $screenshotsDir + } + } + ' > "${CONTEXT_PATH}" -export AGENT_QA_BOOTSTRAP_ERROR="${BOOTSTRAP_ERROR}" -npm run agent-qa +set +e +npm run agent-qa -- --context "${CONTEXT_PATH}" EXIT_CODE=$? STATUS="$(cat artifacts/qa/status.txt 2>/dev/null || printf blocked)" From 1032668d14568f8791358017f7034424dcd8b25a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 18:44:06 +0200 Subject: [PATCH 02/17] fix: harden cali workflow bootstrap --- .eas/workflows/agent-qa-mobile.yml | 4 +- scripts/agent-qa/run-and-export.sh | 104 ++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index 31ee18e..8cca125 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -106,7 +106,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@0.10.1 + npm install -g agent-device@0.10.1 cali@0.4.0-0 - uses: eas/download_build id: download_build with: @@ -146,7 +146,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@0.10.1 + npm install -g agent-device@0.10.1 cali@0.4.0-0 - uses: eas/download_build id: download_build with: diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh index f1ba9f9..d0ccac5 100644 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -47,36 +47,52 @@ jq -n \ ' { workspaceRoot: $workspaceRoot, - pullRequest: - if ($pr | type) == "object" and (($pr | keys) | length) > 0 then - { - number: ($pr.number // null), - title: ($pr.title // null), - body: ($pr.body // null), - url: ($pr.html_url // $pr.url // null), - labels: (($pr.labels // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), - isDraft: ($pr.draft // false), - baseBranch: ($pr.base.ref // null), - headBranch: ($pr.head.ref // null) - } - else - null - end, - mobile: { - platform: $platform, - artifactPath: $artifactPath, - appId: $appId, - deviceName: if $deviceName == "" then null else $deviceName end - }, - build: { - id: if $buildId == "" then null else $buildId end, - workflowUrl: if $workflowUrl == "" then null else $workflowUrl end - }, + mobile: ( + { + platform: $platform, + artifactPath: $artifactPath, + appId: $appId + } + + (if $deviceName == "" then {} else {deviceName: $deviceName} end) + ), output: { outputDir: $outputDir, screenshotsDir: $screenshotsDir } } + + ( + if ($pr | type) == "object" and (($pr | keys) | length) > 0 then + { + pullRequest: ( + { + labels: (($pr.labels // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), + isDraft: ($pr.draft // false) + } + + (if $pr.number == null then {} else {number: $pr.number} end) + + (if $pr.title == null then {} else {title: $pr.title} end) + + {body: ($pr.body // null)} + + (if ($pr.html_url // $pr.url) == null then {} else {url: ($pr.html_url // $pr.url)} end) + + (if $pr.base.ref == null then {} else {baseBranch: $pr.base.ref} end) + + (if $pr.head.ref == null then {} else {headBranch: $pr.head.ref} end) + ) + } + else + {} + end + ) + + ( + if $buildId == "" and $workflowUrl == "" then + {} + else + { + build: ( + {} + + (if $buildId == "" then {} else {id: $buildId} end) + + (if $workflowUrl == "" then {} else {workflowUrl: $workflowUrl} end) + ) + } + end + ) ' > "${CONTEXT_PATH}" set +e @@ -116,6 +132,44 @@ No ${PLATFORM_LABEL} QA section was produced. " fi +if [ ! -f artifacts/qa/report.json ]; then + FALLBACK_SUMMARY="The Cali QA command failed before it could publish a report. Check the run_agent_qa logs above." + cat > artifacts/qa/status.txt < artifacts/qa/section.md < artifacts/qa/report.json +fi + if [ -f artifacts/qa/report.json ]; then TOP_ISSUE="$( jq -r ' From c2e434248dea5b50a5a6df481860a7286a5234a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 18:51:35 +0200 Subject: [PATCH 03/17] fix: tolerate missing pr json in cali wrapper --- scripts/agent-qa/run-and-export.sh | 31 ++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh index d0ccac5..d047a1d 100644 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -8,6 +8,7 @@ APPLICATION_ID_VALUE="${APPLICATION_ID:?APPLICATION_ID is required}" OUTPUT_DIR="artifacts/qa" CONTEXT_PATH="${OUTPUT_DIR}/cali-context.json" SCREENSHOTS_DIR="${OUTPUT_DIR}/screenshots" +PR_JSON_PATH="${OUTPUT_DIR}/pr.json" mkdir -p "${OUTPUT_DIR}" @@ -33,6 +34,12 @@ if [ -z "${DEVICE_NAME_VALUE}" ]; then fi fi +if printf '%s' "${PR_JSON:-}" | jq -c . > "${PR_JSON_PATH}" 2>/dev/null; then + : +else + printf '{}\n' > "${PR_JSON_PATH}" +fi + jq -n \ --arg workspaceRoot "${PWD}" \ --arg platform "${QA_PLATFORM_VALUE}" \ @@ -43,7 +50,7 @@ jq -n \ --arg workflowUrl "${WORKFLOW_URL:-}" \ --arg outputDir "${OUTPUT_DIR}" \ --arg screenshotsDir "${SCREENSHOTS_DIR}" \ - --argjson pr "${PR_JSON:-{}}" \ + --slurpfile prFile "${PR_JSON_PATH}" \ ' { workspaceRoot: $workspaceRoot, @@ -61,19 +68,19 @@ jq -n \ } } + ( - if ($pr | type) == "object" and (($pr | keys) | length) > 0 then + if (($prFile[0] // {}) | type) == "object" and (((($prFile[0] // {}) | keys) | length) > 0) then { pullRequest: ( { - labels: (($pr.labels // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), - isDraft: ($pr.draft // false) + labels: (((($prFile[0] // {}).labels) // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), + isDraft: ((($prFile[0] // {}).draft) // false) } - + (if $pr.number == null then {} else {number: $pr.number} end) - + (if $pr.title == null then {} else {title: $pr.title} end) - + {body: ($pr.body // null)} - + (if ($pr.html_url // $pr.url) == null then {} else {url: ($pr.html_url // $pr.url)} end) - + (if $pr.base.ref == null then {} else {baseBranch: $pr.base.ref} end) - + (if $pr.head.ref == null then {} else {headBranch: $pr.head.ref} end) + + (if (($prFile[0] // {}).number) == null then {} else {number: (($prFile[0] // {}).number)} end) + + (if (($prFile[0] // {}).title) == null then {} else {title: (($prFile[0] // {}).title)} end) + + {body: ((($prFile[0] // {}).body) // null)} + + (if (((($prFile[0] // {}).html_url) // (($prFile[0] // {}).url)) == null) then {} else {url: (((($prFile[0] // {}).html_url) // (($prFile[0] // {}).url)))} end) + + (if (((($prFile[0] // {}).base) // {}).ref) == null then {} else {baseBranch: (((($prFile[0] // {}).base) // {}).ref)} end) + + (if (((($prFile[0] // {}).head) // {}).ref) == null then {} else {headBranch: (((($prFile[0] // {}).head) // {}).ref)} end) ) } else @@ -150,7 +157,7 @@ EOF --arg model "${QA_MODEL:-openai/gpt-5.4-mini}" \ --arg buildId "${BUILD_ID:-}" \ --arg workflowUrl "${WORKFLOW_URL:-}" \ - --argjson pr "${PR_JSON:-{}}" \ + --slurpfile prFile "${PR_JSON_PATH}" \ --arg summary "${FALLBACK_SUMMARY}" \ '{ generatedAt: (now | todateiso8601), @@ -159,7 +166,7 @@ EOF workflowUrl: $workflowUrl, platform: $platform, platformLabel: $platformLabel, - prNumber: ($pr.number // 0), + prNumber: ((($prFile[0] // {}).number) // 0), screenshots: [], agentDeviceTrace: [], overallStatus: "blocked", From 3d465fbae7f7e18d9f1fd31c09236b8009b28656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 19:07:19 +0200 Subject: [PATCH 04/17] fix: avoid device selectors in cali mobile-pr context --- .eas/workflows/agent-qa-mobile.yml | 7 +++++-- scripts/agent-qa/provision-ios-simulator.sh | 9 ++++++++ scripts/agent-qa/run-and-export.sh | 23 +++++---------------- 3 files changed, 19 insertions(+), 20 deletions(-) create mode 100644 scripts/agent-qa/provision-ios-simulator.sh diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index 8cca125..aab5c14 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -106,7 +106,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@0.10.1 cali@0.4.0-0 + npm install -g agent-device@latest cali@0.4.0-0 - uses: eas/download_build id: download_build with: @@ -146,12 +146,15 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@0.10.1 cali@0.4.0-0 + npm install -g agent-device@latest cali@0.4.0-0 - uses: eas/download_build id: download_build with: build_id: ${{ env.BUILD_ID }} extensions: [app] + - id: provision_ios_simulator + run: | + bash ./scripts/agent-qa/provision-ios-simulator.sh - id: run_agent_qa run: | bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" diff --git a/scripts/agent-qa/provision-ios-simulator.sh b/scripts/agent-qa/provision-ios-simulator.sh new file mode 100644 index 0000000..0225e44 --- /dev/null +++ b/scripts/agent-qa/provision-ios-simulator.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +DEVICE_NAME="${AGENT_DEVICE_IOS_DEVICE:?AGENT_DEVICE_IOS_DEVICE is required}" +export AGENT_DEVICE_DAEMON_TIMEOUT_MS="${AGENT_DEVICE_DAEMON_TIMEOUT_MS:-180000}" +export AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS="${AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS:-180000}" + +agent-device ensure-simulator --platform ios --device "${DEVICE_NAME}" --boot diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh index d047a1d..851d242 100644 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -25,15 +25,6 @@ case "${QA_PLATFORM_VALUE}" in esac export APP_PATH="${APP_PATH_ARG}" -DEVICE_NAME_VALUE="${DEVICE_NAME:-}" -if [ -z "${DEVICE_NAME_VALUE}" ]; then - if [ "${QA_PLATFORM_VALUE}" = "ios" ]; then - DEVICE_NAME_VALUE="${AGENT_DEVICE_IOS_DEVICE:-}" - else - DEVICE_NAME_VALUE="${AGENT_DEVICE_ANDROID_DEVICE:-}" - fi -fi - if printf '%s' "${PR_JSON:-}" | jq -c . > "${PR_JSON_PATH}" 2>/dev/null; then : else @@ -45,7 +36,6 @@ jq -n \ --arg platform "${QA_PLATFORM_VALUE}" \ --arg artifactPath "${APP_PATH_ARG}" \ --arg appId "${APPLICATION_ID_VALUE}" \ - --arg deviceName "${DEVICE_NAME_VALUE}" \ --arg buildId "${BUILD_ID:-}" \ --arg workflowUrl "${WORKFLOW_URL:-}" \ --arg outputDir "${OUTPUT_DIR}" \ @@ -54,14 +44,11 @@ jq -n \ ' { workspaceRoot: $workspaceRoot, - mobile: ( - { - platform: $platform, - artifactPath: $artifactPath, - appId: $appId - } - + (if $deviceName == "" then {} else {deviceName: $deviceName} end) - ), + mobile: { + platform: $platform, + artifactPath: $artifactPath, + appId: $appId + }, output: { outputDir: $outputDir, screenshotsDir: $screenshotsDir From 45ac45deaf0860cc68eac5b7c2dc506663d1e427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 19:24:49 +0200 Subject: [PATCH 05/17] fix: upgrade cali qa workflow to 0.4.0-1 --- .eas/workflows/agent-qa-mobile.yml | 7 ++---- scripts/agent-qa/provision-ios-simulator.sh | 9 -------- scripts/agent-qa/run-and-export.sh | 24 +++++++++++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) delete mode 100644 scripts/agent-qa/provision-ios-simulator.sh diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index aab5c14..aefc9dc 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -106,7 +106,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-0 + npm install -g agent-device@latest cali@0.4.0-1 - uses: eas/download_build id: download_build with: @@ -146,15 +146,12 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-0 + npm install -g agent-device@latest cali@0.4.0-1 - uses: eas/download_build id: download_build with: build_id: ${{ env.BUILD_ID }} extensions: [app] - - id: provision_ios_simulator - run: | - bash ./scripts/agent-qa/provision-ios-simulator.sh - id: run_agent_qa run: | bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" diff --git a/scripts/agent-qa/provision-ios-simulator.sh b/scripts/agent-qa/provision-ios-simulator.sh deleted file mode 100644 index 0225e44..0000000 --- a/scripts/agent-qa/provision-ios-simulator.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -set -euxo pipefail - -DEVICE_NAME="${AGENT_DEVICE_IOS_DEVICE:?AGENT_DEVICE_IOS_DEVICE is required}" -export AGENT_DEVICE_DAEMON_TIMEOUT_MS="${AGENT_DEVICE_DAEMON_TIMEOUT_MS:-180000}" -export AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS="${AGENT_DEVICE_IOS_BOOT_TIMEOUT_MS:-180000}" - -agent-device ensure-simulator --platform ios --device "${DEVICE_NAME}" --boot diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh index 851d242..c19d08c 100644 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -25,6 +25,14 @@ case "${QA_PLATFORM_VALUE}" in esac export APP_PATH="${APP_PATH_ARG}" +DEVICE_NAME_VALUE="${DEVICE_NAME:-}" +if [ -z "${DEVICE_NAME_VALUE}" ]; then + if [ "${QA_PLATFORM_VALUE}" = "ios" ]; then + DEVICE_NAME_VALUE="${AGENT_DEVICE_IOS_DEVICE:-}" + else + DEVICE_NAME_VALUE="${AGENT_DEVICE_ANDROID_DEVICE:-}" + fi +fi if printf '%s' "${PR_JSON:-}" | jq -c . > "${PR_JSON_PATH}" 2>/dev/null; then : else @@ -36,6 +44,7 @@ jq -n \ --arg platform "${QA_PLATFORM_VALUE}" \ --arg artifactPath "${APP_PATH_ARG}" \ --arg appId "${APPLICATION_ID_VALUE}" \ + --arg deviceName "${DEVICE_NAME_VALUE}" \ --arg buildId "${BUILD_ID:-}" \ --arg workflowUrl "${WORKFLOW_URL:-}" \ --arg outputDir "${OUTPUT_DIR}" \ @@ -44,11 +53,14 @@ jq -n \ ' { workspaceRoot: $workspaceRoot, - mobile: { - platform: $platform, - artifactPath: $artifactPath, - appId: $appId - }, + mobile: ( + { + platform: $platform, + artifactPath: $artifactPath, + appId: $appId + } + + (if $deviceName == "" then {} else {deviceName: $deviceName} end) + ), output: { outputDir: $outputDir, screenshotsDir: $screenshotsDir @@ -90,7 +102,7 @@ jq -n \ ' > "${CONTEXT_PATH}" set +e -npm run agent-qa -- --context "${CONTEXT_PATH}" +cali qa --env mobile-pr --quiet --context "${CONTEXT_PATH}" EXIT_CODE=$? STATUS="$(cat artifacts/qa/status.txt 2>/dev/null || printf blocked)" From 74a471302d2b56aafd68566febf55908189fff7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 19:51:36 +0200 Subject: [PATCH 06/17] fix: configure cali to load bundled agent-device skills --- README.md | 2 ++ cali.config.json | 3 +++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index cf7f4f0..f8f4137 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ Optional environment variables for the QA job: - `QA_MODEL`: Override the default model (`openai/gpt-5.4-mini`) - `BLOB_READ_WRITE_TOKEN`: Upload screenshots to Vercel Blob and include public links in the PR comment +The repo-level [cali.config.json](./cali.config.json) points Cali at the bundled [`agent-device`](https://www.npmjs.com/package/agent-device) skills under `./node_modules/agent-device/skills`, so CI does not need a separate `~/.agents/skills` setup. + ## Local smoke test ```bash diff --git a/cali.config.json b/cali.config.json index e9f2afb..dbea13b 100644 --- a/cali.config.json +++ b/cali.config.json @@ -1,4 +1,7 @@ { + "skillPaths": [ + "./node_modules/agent-device/skills" + ], "commands": { "qa": { "extraInstructions": [ From d9bc00e987350f9a8a889f49f46d4a55ac65345a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 20:04:07 +0200 Subject: [PATCH 07/17] fix: install agent-device skill for codex in ci --- .eas/workflows/agent-qa-mobile.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index aefc9dc..a8329ac 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -107,6 +107,9 @@ jobs: - id: install_agent_device run: | npm install -g agent-device@latest cali@0.4.0-1 + - id: install_agent_device_skill + run: | + npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y - uses: eas/download_build id: download_build with: @@ -147,6 +150,9 @@ jobs: - id: install_agent_device run: | npm install -g agent-device@latest cali@0.4.0-1 + - id: install_agent_device_skill + run: | + npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y - uses: eas/download_build id: download_build with: From 8e04d7784e5ef87086c159f985926eee8377564a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 20:48:50 +0200 Subject: [PATCH 08/17] fix: redact repository clone url from qa context --- scripts/agent-qa/run-and-export.sh | 31 +++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh index c19d08c..04e2ea2 100644 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -51,8 +51,21 @@ jq -n \ --arg screenshotsDir "${SCREENSHOTS_DIR}" \ --slurpfile prFile "${PR_JSON_PATH}" \ ' + def pr: ($prFile[0] // {}); + { workspaceRoot: $workspaceRoot, + repository: ( + { + cloneUrl: "" + } + + (if (((pr.base // {}).repo // {}).owner // {}).login == null then {} else {owner: ((((pr.base // {}).repo // {}).owner // {}).login)} end) + + (if (((pr.base // {}).repo // {}).name) == null then {} else {name: (((pr.base // {}).repo // {}).name)} end) + + (if (((((pr.base // {}).repo // {}).html_url) // "") | startswith("https://github.com/")) then {provider: "github.com"} else {} end) + + (if (((pr.base // {}).ref) == null then {} else {defaultBranch: ((pr.base // {}).ref)} end)) + + (if (((pr.head // {}).ref) == null then {} else {currentBranch: ((pr.head // {}).ref)} end)) + + (if (((pr.head // {}).sha) == null then {} else {commitSha: ((pr.head // {}).sha)} end)) + ), mobile: ( { platform: $platform, @@ -67,19 +80,19 @@ jq -n \ } } + ( - if (($prFile[0] // {}) | type) == "object" and (((($prFile[0] // {}) | keys) | length) > 0) then + if (pr | type) == "object" and (((pr | keys) | length) > 0) then { pullRequest: ( { - labels: (((($prFile[0] // {}).labels) // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), - isDraft: ((($prFile[0] // {}).draft) // false) + labels: (((pr.labels) // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), + isDraft: ((pr.draft) // false) } - + (if (($prFile[0] // {}).number) == null then {} else {number: (($prFile[0] // {}).number)} end) - + (if (($prFile[0] // {}).title) == null then {} else {title: (($prFile[0] // {}).title)} end) - + {body: ((($prFile[0] // {}).body) // null)} - + (if (((($prFile[0] // {}).html_url) // (($prFile[0] // {}).url)) == null) then {} else {url: (((($prFile[0] // {}).html_url) // (($prFile[0] // {}).url)))} end) - + (if (((($prFile[0] // {}).base) // {}).ref) == null then {} else {baseBranch: (((($prFile[0] // {}).base) // {}).ref)} end) - + (if (((($prFile[0] // {}).head) // {}).ref) == null then {} else {headBranch: (((($prFile[0] // {}).head) // {}).ref)} end) + + (if (pr.number) == null then {} else {number: (pr.number)} end) + + (if (pr.title) == null then {} else {title: (pr.title)} end) + + {body: ((pr.body) // null)} + + (if (((pr.html_url) // (pr.url)) == null) then {} else {url: (((pr.html_url) // (pr.url)))} end) + + (if (((pr.base) // {}).ref) == null then {} else {baseBranch: (((pr.base) // {}).ref)} end) + + (if (((pr.head) // {}).ref) == null then {} else {headBranch: (((pr.head) // {}).ref)} end) ) } else From d824c44677f72b9e1c172c3ac91e939f7c1fe024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 8 Apr 2026 21:25:10 +0200 Subject: [PATCH 09/17] refactor: use cali ci helpers in qa workflow --- .eas/workflows/agent-qa-mobile.yml | 4 +- scripts/agent-qa/run-and-export.sh | 187 +++++++---------------------- 2 files changed, 45 insertions(+), 146 deletions(-) mode change 100644 => 100755 scripts/agent-qa/run-and-export.sh diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index a8329ac..326abf2 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -106,7 +106,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-1 + npm install -g agent-device@latest cali@0.4.0-2 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y @@ -149,7 +149,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-1 + npm install -g agent-device@latest cali@0.4.0-2 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh old mode 100644 new mode 100755 index 04e2ea2..9a23056 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -7,8 +7,7 @@ QA_PLATFORM_VALUE="${QA_PLATFORM:?QA_PLATFORM is required}" APPLICATION_ID_VALUE="${APPLICATION_ID:?APPLICATION_ID is required}" OUTPUT_DIR="artifacts/qa" CONTEXT_PATH="${OUTPUT_DIR}/cali-context.json" -SCREENSHOTS_DIR="${OUTPUT_DIR}/screenshots" -PR_JSON_PATH="${OUTPUT_DIR}/pr.json" +SCREENSHOTS_JSON_PATH="${OUTPUT_DIR}/screenshots.json" mkdir -p "${OUTPUT_DIR}" @@ -25,6 +24,7 @@ case "${QA_PLATFORM_VALUE}" in esac export APP_PATH="${APP_PATH_ARG}" +export CALI_OUTPUT_DIR="${OUTPUT_DIR}" DEVICE_NAME_VALUE="${DEVICE_NAME:-}" if [ -z "${DEVICE_NAME_VALUE}" ]; then if [ "${QA_PLATFORM_VALUE}" = "ios" ]; then @@ -33,92 +33,24 @@ if [ -z "${DEVICE_NAME_VALUE}" ]; then DEVICE_NAME_VALUE="${AGENT_DEVICE_ANDROID_DEVICE:-}" fi fi -if printf '%s' "${PR_JSON:-}" | jq -c . > "${PR_JSON_PATH}" 2>/dev/null; then - : + +set +e +if [ -n "${DEVICE_NAME_VALUE}" ]; then + cali write-mobile-pr-context --from eas --output "${CONTEXT_PATH}" --device "${DEVICE_NAME_VALUE}" + CONTEXT_EXIT=$? else - printf '{}\n' > "${PR_JSON_PATH}" + cali write-mobile-pr-context --from eas --output "${CONTEXT_PATH}" + CONTEXT_EXIT=$? fi -jq -n \ - --arg workspaceRoot "${PWD}" \ - --arg platform "${QA_PLATFORM_VALUE}" \ - --arg artifactPath "${APP_PATH_ARG}" \ - --arg appId "${APPLICATION_ID_VALUE}" \ - --arg deviceName "${DEVICE_NAME_VALUE}" \ - --arg buildId "${BUILD_ID:-}" \ - --arg workflowUrl "${WORKFLOW_URL:-}" \ - --arg outputDir "${OUTPUT_DIR}" \ - --arg screenshotsDir "${SCREENSHOTS_DIR}" \ - --slurpfile prFile "${PR_JSON_PATH}" \ - ' - def pr: ($prFile[0] // {}); - - { - workspaceRoot: $workspaceRoot, - repository: ( - { - cloneUrl: "" - } - + (if (((pr.base // {}).repo // {}).owner // {}).login == null then {} else {owner: ((((pr.base // {}).repo // {}).owner // {}).login)} end) - + (if (((pr.base // {}).repo // {}).name) == null then {} else {name: (((pr.base // {}).repo // {}).name)} end) - + (if (((((pr.base // {}).repo // {}).html_url) // "") | startswith("https://github.com/")) then {provider: "github.com"} else {} end) - + (if (((pr.base // {}).ref) == null then {} else {defaultBranch: ((pr.base // {}).ref)} end)) - + (if (((pr.head // {}).ref) == null then {} else {currentBranch: ((pr.head // {}).ref)} end)) - + (if (((pr.head // {}).sha) == null then {} else {commitSha: ((pr.head // {}).sha)} end)) - ), - mobile: ( - { - platform: $platform, - artifactPath: $artifactPath, - appId: $appId - } - + (if $deviceName == "" then {} else {deviceName: $deviceName} end) - ), - output: { - outputDir: $outputDir, - screenshotsDir: $screenshotsDir - } - } - + ( - if (pr | type) == "object" and (((pr | keys) | length) > 0) then - { - pullRequest: ( - { - labels: (((pr.labels) // []) | map(if type == "object" then (.name // empty) else . end) | map(select(. != ""))), - isDraft: ((pr.draft) // false) - } - + (if (pr.number) == null then {} else {number: (pr.number)} end) - + (if (pr.title) == null then {} else {title: (pr.title)} end) - + {body: ((pr.body) // null)} - + (if (((pr.html_url) // (pr.url)) == null) then {} else {url: (((pr.html_url) // (pr.url)))} end) - + (if (((pr.base) // {}).ref) == null then {} else {baseBranch: (((pr.base) // {}).ref)} end) - + (if (((pr.head) // {}).ref) == null then {} else {headBranch: (((pr.head) // {}).ref)} end) - ) - } - else - {} - end - ) - + ( - if $buildId == "" and $workflowUrl == "" then - {} - else - { - build: ( - {} - + (if $buildId == "" then {} else {id: $buildId} end) - + (if $workflowUrl == "" then {} else {workflowUrl: $workflowUrl} end) - ) - } - end - ) - ' > "${CONTEXT_PATH}" - -set +e -cali qa --env mobile-pr --quiet --context "${CONTEXT_PATH}" -EXIT_CODE=$? +if [ "${CONTEXT_EXIT}" -eq 0 ]; then + cali qa --env eas-mobile-pr --quiet --context "${CONTEXT_PATH}" + EXIT_CODE=$? +else + EXIT_CODE="${CONTEXT_EXIT}" +fi -STATUS="$(cat artifacts/qa/status.txt 2>/dev/null || printf blocked)" +STATUS="$(cat "${OUTPUT_DIR}/status.txt" 2>/dev/null || printf blocked)" case "${STATUS}" in passed) STATUS_LABEL="✅ passed" @@ -140,91 +72,58 @@ case "${STATUS}" in ;; esac -if [ -f artifacts/qa/section.md ]; then - SECTION_BODY="$(cat artifacts/qa/section.md)" -else - SECTION_BODY="### ${PLATFORM_LABEL} - -**Status:** ${STATUS_LABEL} - -No ${PLATFORM_LABEL} QA section was produced. -" -fi - -if [ ! -f artifacts/qa/report.json ]; then +if [ ! -f "${OUTPUT_DIR}/report.json" ]; then FALLBACK_SUMMARY="The Cali QA command failed before it could publish a report. Check the run_agent_qa logs above." - cat > artifacts/qa/status.txt < "${OUTPUT_DIR}/status.txt" < artifacts/qa/section.md < "${OUTPUT_DIR}/section.md" < artifacts/qa/report.json +EOF2 + cat > "${OUTPUT_DIR}/top-issue.txt" < "${OUTPUT_DIR}/screenshots.json" </dev/null | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')" +if [ -z "${TOP_ISSUE}" ]; then + if [ "${STATUS}" = "passed" ]; then + TOP_ISSUE="N/A" + else + TOP_ISSUE="No report.json was produced." + fi +fi +if [ -f "${SCREENSHOTS_JSON_PATH}" ]; then SCREENSHOTS_CELL="$( jq -r ' - if (.screenshots | length) == 0 then + if ((.screenshots // []) | length) == 0 then "N/A" else [ - .screenshots[] + (.screenshots // [])[] | if .blobUrl then - "**\((.label // .fileName))**
\"\((.label" + "**\((.label // .fileName // \"Screenshot\"))**
\"\((.label" else - "**\((.label // .fileName))**
\(.fileName) (\(.bytes) bytes)" + "**\((.label // .fileName // \"Screenshot\"))**
\(.fileName // \"screenshot\")" end ] | join("

") end - ' artifacts/qa/report.json + ' "${SCREENSHOTS_JSON_PATH}" )" else - if [ "${STATUS}" = "passed" ]; then - TOP_ISSUE="N/A" - else - TOP_ISSUE="No report.json was produced." - fi SCREENSHOTS_CELL="N/A" fi +SECTION_BODY="$(cat "${OUTPUT_DIR}/section.md" 2>/dev/null || printf '### %s\n\n**Status:** %s\n\nNo %s QA section was produced.\n' "${PLATFORM_LABEL}" "${STATUS_LABEL}" "${PLATFORM_LABEL}")" + set-output status "$STATUS" set-output status_label "$STATUS_LABEL" set-output top_issue "$TOP_ISSUE" From 7721ac3bf4232545515ed2dde7b05176a05d4d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 9 Apr 2026 15:31:37 +0200 Subject: [PATCH 10/17] chore: bump cali to 0.4.0-3 --- .eas/workflows/agent-qa-mobile.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index 326abf2..15e78f4 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -106,7 +106,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-2 + npm install -g agent-device@latest cali@0.4.0-3 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y @@ -149,7 +149,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-2 + npm install -g agent-device@latest cali@0.4.0-3 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y From cfd69220cc31478d6e351b9d69d4e5687105b1fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 10 Apr 2026 10:11:12 +0200 Subject: [PATCH 11/17] fix: repair screenshot output rendering --- scripts/agent-qa/run-and-export.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh index 9a23056..4b57b2b 100755 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -110,9 +110,9 @@ if [ -f "${SCREENSHOTS_JSON_PATH}" ]; then [ (.screenshots // [])[] | if .blobUrl then - "**\((.label // .fileName // \"Screenshot\"))**
\"\((.label" + "**\((.label // .fileName // "Screenshot"))**
\" else - "**\((.label // .fileName // \"Screenshot\"))**
\(.fileName // \"screenshot\")" + "**\((.label // .fileName // "Screenshot"))**
\(.fileName // "screenshot")" end ] | join("

") end From 200355bf2fd67d6007371aab8e5847982e19d098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 10 Apr 2026 13:21:17 +0200 Subject: [PATCH 12/17] refactor: use cali ci export flow --- .eas/workflows/agent-qa-mobile.yml | 89 +++++++++-------- scripts/agent-qa/run-and-export.sh | 148 +++++++++-------------------- 2 files changed, 92 insertions(+), 145 deletions(-) mode change 100755 => 100644 scripts/agent-qa/run-and-export.sh diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index 15e78f4..04db4cf 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -88,11 +88,7 @@ jobs: environment: preview runs_on: linux-medium-nested-virtualization outputs: - status: ${{ steps.run_agent_qa.outputs.status }} - status_label: ${{ steps.run_agent_qa.outputs.status_label }} - top_issue: ${{ steps.run_agent_qa.outputs.top_issue }} - screenshots_cell: ${{ steps.run_agent_qa.outputs.screenshots_cell }} - section_body: ${{ steps.run_agent_qa.outputs.section_body }} + report_json: ${{ steps.capture_qa_outputs.outputs.report_json }} env: BUILD_ID: ${{ after.android_repack.outputs.build_id || after.android_build.outputs.build_id }} PR_JSON: ${{ toJSON(github.event.pull_request) }} @@ -106,7 +102,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-3 + npm install -g agent-device@latest cali@0.4.0-4 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y @@ -120,7 +116,10 @@ jobs: bash ./scripts/agent-qa/provision-android-emulator.sh - id: run_agent_qa run: | - bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" + bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true + - id: capture_qa_outputs + run: | + set-output report_json "$(jq -c . artifacts/qa/report.json)" qa_ios: name: iOS agent-device QA @@ -129,11 +128,7 @@ jobs: environment: preview runs_on: macos-medium outputs: - status: ${{ steps.run_agent_qa.outputs.status }} - status_label: ${{ steps.run_agent_qa.outputs.status_label }} - top_issue: ${{ steps.run_agent_qa.outputs.top_issue }} - screenshots_cell: ${{ steps.run_agent_qa.outputs.screenshots_cell }} - section_body: ${{ steps.run_agent_qa.outputs.section_body }} + report_json: ${{ steps.capture_qa_outputs.outputs.report_json }} env: BUILD_ID: ${{ after.ios_repack.outputs.build_id || after.ios_build.outputs.build_id }} PR_JSON: ${{ toJSON(github.event.pull_request) }} @@ -149,7 +144,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-3 + npm install -g agent-device@latest cali@0.4.0-4 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y @@ -160,36 +155,50 @@ jobs: extensions: [app] - id: run_agent_qa run: | - bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" + bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true + - id: capture_qa_outputs + run: | + set-output report_json "$(jq -c . artifacts/qa/report.json)" + + compose_comment: + name: Compose PR comment + after: [qa_android, qa_ios] + if: ${{ always() }} + environment: preview + runs_on: linux-medium + outputs: + payload: ${{ steps.compose.outputs.payload }} + steps: + - uses: eas/checkout + - id: install_cali + run: | + npm install -g cali@0.4.0-4 + - id: compose + env: + ANDROID_REPORT_JSON: ${{ after.qa_android.outputs.report_json || '' }} + IOS_REPORT_JSON: ${{ after.qa_ios.outputs.report_json || '' }} + run: | + mkdir -p artifacts/android artifacts/ios artifacts/combined-comment + EXPORT_ARGS=() + if [ -n "${ANDROID_REPORT_JSON}" ]; then + printf '%s' "${ANDROID_REPORT_JSON}" | jq . > artifacts/android/report.json + EXPORT_ARGS+=(--android artifacts/android/report.json) + fi + if [ -n "${IOS_REPORT_JSON}" ]; then + printf '%s' "${IOS_REPORT_JSON}" | jq . > artifacts/ios/report.json + EXPORT_ARGS+=(--ios artifacts/ios/report.json) + fi + if [ "${#EXPORT_ARGS[@]}" -eq 0 ]; then + printf 'Agent QA comment was not produced.\n' > artifacts/combined-comment/ci-comment.md + else + cali export-ci "${EXPORT_ARGS[@]}" --output-dir artifacts/combined-comment + fi + set-output payload "$(cat artifacts/combined-comment/ci-comment.md)" qa_comment: name: Comment on PR - after: [qa_android, qa_ios] + after: [compose_comment] if: ${{ always() && github.event_name == 'pull_request' }} type: github-comment params: - payload: | - ## Agent QA - - | Platform | Status | Issues | - | --- | --- | --- | - | Android | ${{ after.qa_android.outputs.status_label || '⛔ blocked' }} | ${{ after.qa_android.outputs.top_issue || 'No Android QA report was produced.' }} | - | iOS | ${{ after.qa_ios.outputs.status_label || '⛔ blocked' }} | ${{ after.qa_ios.outputs.top_issue || 'No iOS QA report was produced.' }} | - - ### Screenshots - - | Android | iOS | - | --- | --- | - | ${{ after.qa_android.outputs.screenshots_cell || 'N/A' }} | ${{ after.qa_ios.outputs.screenshots_cell || 'N/A' }} | - -
- Full Android Report - - ${{ after.qa_android.outputs.section_body || '### Android\n\n**Status:** ⛔ blocked\n\nNo Android QA section was produced.\n' }} -
- -
- Full iOS Report - - ${{ after.qa_ios.outputs.section_body || '### iOS\n\n**Status:** ⛔ blocked\n\nNo iOS QA section was produced.\n' }} -
+ payload: ${{ after.compose_comment.outputs.payload || 'Agent QA comment was not produced.' }} diff --git a/scripts/agent-qa/run-and-export.sh b/scripts/agent-qa/run-and-export.sh old mode 100755 new mode 100644 index 4b57b2b..91422f1 --- a/scripts/agent-qa/run-and-export.sh +++ b/scripts/agent-qa/run-and-export.sh @@ -6,25 +6,9 @@ APP_PATH_ARG="${1:?APP_PATH argument is required}" QA_PLATFORM_VALUE="${QA_PLATFORM:?QA_PLATFORM is required}" APPLICATION_ID_VALUE="${APPLICATION_ID:?APPLICATION_ID is required}" OUTPUT_DIR="artifacts/qa" -CONTEXT_PATH="${OUTPUT_DIR}/cali-context.json" -SCREENSHOTS_JSON_PATH="${OUTPUT_DIR}/screenshots.json" mkdir -p "${OUTPUT_DIR}" -case "${QA_PLATFORM_VALUE}" in - ios) - PLATFORM_LABEL="iOS" - ;; - android) - PLATFORM_LABEL="Android" - ;; - *) - PLATFORM_LABEL="${QA_PLATFORM_VALUE}" - ;; -esac - -export APP_PATH="${APP_PATH_ARG}" -export CALI_OUTPUT_DIR="${OUTPUT_DIR}" DEVICE_NAME_VALUE="${DEVICE_NAME:-}" if [ -z "${DEVICE_NAME_VALUE}" ]; then if [ "${QA_PLATFORM_VALUE}" = "ios" ]; then @@ -34,99 +18,53 @@ if [ -z "${DEVICE_NAME_VALUE}" ]; then fi fi -set +e -if [ -n "${DEVICE_NAME_VALUE}" ]; then - cali write-mobile-pr-context --from eas --output "${CONTEXT_PATH}" --device "${DEVICE_NAME_VALUE}" - CONTEXT_EXIT=$? -else - cali write-mobile-pr-context --from eas --output "${CONTEXT_PATH}" - CONTEXT_EXIT=$? -fi - -if [ "${CONTEXT_EXIT}" -eq 0 ]; then - cali qa --env eas-mobile-pr --quiet --context "${CONTEXT_PATH}" - EXIT_CODE=$? -else - EXIT_CODE="${CONTEXT_EXIT}" -fi - -STATUS="$(cat "${OUTPUT_DIR}/status.txt" 2>/dev/null || printf blocked)" -case "${STATUS}" in - passed) - STATUS_LABEL="✅ passed" - ;; - failed) - STATUS_LABEL="❌ failed" - ;; - blocked) - STATUS_LABEL="⛔ blocked" - ;; - unsure) - STATUS_LABEL="🤔 unsure" - ;; - not_tested) - STATUS_LABEL="⚪ not_tested" - ;; - *) - STATUS_LABEL="⚪ ${STATUS}" - ;; -esac - -if [ ! -f "${OUTPUT_DIR}/report.json" ]; then - FALLBACK_SUMMARY="The Cali QA command failed before it could publish a report. Check the run_agent_qa logs above." - cat > "${OUTPUT_DIR}/status.txt" < "${OUTPUT_DIR}/section.md" < "${OUTPUT_DIR}/top-issue.txt" < "${OUTPUT_DIR}/screenshots.json" </dev/null | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//')" -if [ -z "${TOP_ISSUE}" ]; then - if [ "${STATUS}" = "passed" ]; then - TOP_ISSUE="N/A" - else - TOP_ISSUE="No report.json was produced." - fi -fi +set +e +cali "${QA_ARGS[@]}" +QA_EXIT_CODE=$? +set -e -if [ -f "${SCREENSHOTS_JSON_PATH}" ]; then - SCREENSHOTS_CELL="$( - jq -r ' - if ((.screenshots // []) | length) == 0 then - "N/A" - else - [ - (.screenshots // [])[] - | if .blobUrl then - "**\((.label // .fileName // "Screenshot"))**
\" - else - "**\((.label // .fileName // "Screenshot"))**
\(.fileName // "screenshot")" - end - ] | join("

") - end - ' "${SCREENSHOTS_JSON_PATH}" - )" -else - SCREENSHOTS_CELL="N/A" +if [ ! -f "${OUTPUT_DIR}/report.json" ]; then + cat > "${OUTPUT_DIR}/report.json" </dev/null || printf '### %s\n\n**Status:** %s\n\nNo %s QA section was produced.\n' "${PLATFORM_LABEL}" "${STATUS_LABEL}" "${PLATFORM_LABEL}")" - -set-output status "$STATUS" -set-output status_label "$STATUS_LABEL" -set-output top_issue "$TOP_ISSUE" -set-output screenshots_cell "$SCREENSHOTS_CELL" -set-output section_body "$SECTION_BODY" -exit $EXIT_CODE +cali export-ci --report "${OUTPUT_DIR}/report.json" +exit "${QA_EXIT_CODE}" From cb37ab1c906439392e28b68a41d9c8791198df76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 10 Apr 2026 13:30:59 +0200 Subject: [PATCH 13/17] docs: clarify cali qa env usage --- README.md | 12 ++++++------ package.json | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f8f4137..7360867 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Optional environment variables for the QA job: - `QA_MODEL`: Override the default model (`openai/gpt-5.4-mini`) - `BLOB_READ_WRITE_TOKEN`: Upload screenshots to Vercel Blob and include public links in the PR comment -The repo-level [cali.config.json](./cali.config.json) points Cali at the bundled [`agent-device`](https://www.npmjs.com/package/agent-device) skills under `./node_modules/agent-device/skills`, so CI does not need a separate `~/.agents/skills` setup. +The workflow installs the [`agent-device`](https://www.npmjs.com/package/agent-device) skill explicitly in CI with `npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y`, so Cali can discover it from the standard `.agents/skills` location. ## Local smoke test @@ -44,7 +44,9 @@ npm install npx cali qa --help ``` -The workflow runner writes `section.md`, `status.txt`, `report.json`, and `cali-context.json` to `artifacts/qa/` during execution. Screenshots are written to `artifacts/qa/screenshots` and uploaded to Vercel Blob when configured. +Local runs use the `local-android` and `local-ios` Cali envs. The EAS workflow uses `cali qa --ci eas ...`, not `--env mobile-pr`. + +The workflow runner writes `report.json`, `section.md`, `status.txt`, and CI export files like `ci-comment.md` and `ci-output.json` to `artifacts/qa/`. Screenshots are written to `artifacts/qa/screenshots` and uploaded to Vercel Blob when configured. To execute the QA command directly, provide the same inputs that the workflow uses: @@ -52,8 +54,7 @@ Android: ```bash AI_GATEWAY_API_KEY=... \ -./node_modules/.bin/cali qa \ - --env local-android \ +npm run agent-qa:android -- \ --artifact /absolute/path/to/app.apk \ --app-id dev.expo.easagentdevice \ --device ci-android \ @@ -64,8 +65,7 @@ iOS simulator: ```bash AI_GATEWAY_API_KEY=... \ -./node_modules/.bin/cali qa \ - --env local-ios \ +npm run agent-qa:ios -- \ --artifact /absolute/path/to/MyApp.app \ --app-id dev.expo.easagentdevice \ --device "iPhone 17" \ diff --git a/package.json b/package.json index 064e622..faf88cd 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,8 @@ "ios": "expo run:ios", "web": "expo start --web", "lint": "expo lint", - "agent-qa": "cali qa --env mobile-pr --quiet" + "agent-qa:android": "cali qa --env local-android --quiet", + "agent-qa:ios": "cali qa --env local-ios --quiet" }, "dependencies": { "@expo/vector-icons": "^15.0.3", @@ -18,6 +19,7 @@ "@react-navigation/native": "^7.1.8", "expo": "^55.0.8", "expo-constants": "~55.0.9", + "expo-dev-client": "~55.0.23", "expo-font": "~55.0.4", "expo-haptics": "~55.0.9", "expo-image": "~55.0.6", @@ -40,7 +42,7 @@ }, "devDependencies": { "@types/react": "~19.2.10", - "cali": "0.4.0-0", + "cali": "0.4.0-1", "eslint": "^9.25.0", "eslint-config-expo": "~55.0.0", "typescript": "~5.9.2" From 0d933f16fc380fa6419d778defcdb3b27f7f5658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 10 Apr 2026 13:32:50 +0200 Subject: [PATCH 14/17] docs: clarify local device selection --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 7360867..535ce77 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Local runs use the `local-android` and `local-ios` Cali envs. The EAS workflow u The workflow runner writes `report.json`, `section.md`, `status.txt`, and CI export files like `ci-comment.md` and `ci-output.json` to `artifacts/qa/`. Screenshots are written to `artifacts/qa/screenshots` and uploaded to Vercel Blob when configured. -To execute the QA command directly, provide the same inputs that the workflow uses: +To execute the QA command directly, provide the same core inputs that the workflow uses. `--device` is optional locally; pass it only when you want to target a specific simulator or emulator. Android: @@ -57,7 +57,6 @@ AI_GATEWAY_API_KEY=... \ npm run agent-qa:android -- \ --artifact /absolute/path/to/app.apk \ --app-id dev.expo.easagentdevice \ - --device ci-android \ --prompt "verify the updated welcome title" ``` @@ -68,6 +67,5 @@ AI_GATEWAY_API_KEY=... \ npm run agent-qa:ios -- \ --artifact /absolute/path/to/MyApp.app \ --app-id dev.expo.easagentdevice \ - --device "iPhone 17" \ --prompt "verify the updated welcome title" ``` From 6d94f09daf71638e23e94abff217ab90b84c6695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 10 Apr 2026 13:33:44 +0200 Subject: [PATCH 15/17] docs: focus repo on ci qa flow --- README.md | 41 ++++++++++++----------------------------- package.json | 4 +--- 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 535ce77..b0116b2 100644 --- a/README.md +++ b/README.md @@ -37,35 +37,18 @@ Optional environment variables for the QA job: The workflow installs the [`agent-device`](https://www.npmjs.com/package/agent-device) skill explicitly in CI with `npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y`, so Cali can discover it from the standard `.agents/skills` location. -## Local smoke test +## CI flow -```bash -npm install -npx cali qa --help -``` +The workflow uses `cali qa --ci eas ...` for each platform and then `cali export-ci` to produce: -Local runs use the `local-android` and `local-ios` Cali envs. The EAS workflow uses `cali qa --ci eas ...`, not `--env mobile-pr`. +- `artifacts/qa/report.json` +- `artifacts/qa/section.md` +- `artifacts/qa/status.txt` +- `artifacts/qa/summary.txt` +- `artifacts/qa/top-issue.txt` +- `artifacts/qa/screenshots.md` +- `artifacts/qa/screenshots.json` +- `artifacts/qa/ci-comment.md` +- `artifacts/qa/ci-output.json` -The workflow runner writes `report.json`, `section.md`, `status.txt`, and CI export files like `ci-comment.md` and `ci-output.json` to `artifacts/qa/`. Screenshots are written to `artifacts/qa/screenshots` and uploaded to Vercel Blob when configured. - -To execute the QA command directly, provide the same core inputs that the workflow uses. `--device` is optional locally; pass it only when you want to target a specific simulator or emulator. - -Android: - -```bash -AI_GATEWAY_API_KEY=... \ -npm run agent-qa:android -- \ - --artifact /absolute/path/to/app.apk \ - --app-id dev.expo.easagentdevice \ - --prompt "verify the updated welcome title" -``` - -iOS simulator: - -```bash -AI_GATEWAY_API_KEY=... \ -npm run agent-qa:ios -- \ - --artifact /absolute/path/to/MyApp.app \ - --app-id dev.expo.easagentdevice \ - --prompt "verify the updated welcome title" -``` +Android and iOS reports are then combined into one PR comment in the final workflow step. diff --git a/package.json b/package.json index faf88cd..72c5982 100644 --- a/package.json +++ b/package.json @@ -8,9 +8,7 @@ "android": "expo run:android", "ios": "expo run:ios", "web": "expo start --web", - "lint": "expo lint", - "agent-qa:android": "cali qa --env local-android --quiet", - "agent-qa:ios": "cali qa --env local-ios --quiet" + "lint": "expo lint" }, "dependencies": { "@expo/vector-icons": "^15.0.3", From 0404c52707f27477cb855dea087e1056c1bd0c8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Sun, 12 Apr 2026 12:23:54 +0200 Subject: [PATCH 16/17] fix: harden qa report handoff --- .eas/workflows/agent-qa-mobile.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index 04db4cf..00aba19 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -88,7 +88,7 @@ jobs: environment: preview runs_on: linux-medium-nested-virtualization outputs: - report_json: ${{ steps.capture_qa_outputs.outputs.report_json }} + report_b64: ${{ steps.capture_qa_outputs.outputs.report_b64 }} env: BUILD_ID: ${{ after.android_repack.outputs.build_id || after.android_build.outputs.build_id }} PR_JSON: ${{ toJSON(github.event.pull_request) }} @@ -102,7 +102,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-4 + npm install -g agent-device@latest cali@0.4.0-5 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y @@ -119,7 +119,7 @@ jobs: bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true - id: capture_qa_outputs run: | - set-output report_json "$(jq -c . artifacts/qa/report.json)" + set-output report_b64 "$(base64 < artifacts/qa/report.json | tr -d '\n')" qa_ios: name: iOS agent-device QA @@ -128,7 +128,7 @@ jobs: environment: preview runs_on: macos-medium outputs: - report_json: ${{ steps.capture_qa_outputs.outputs.report_json }} + report_b64: ${{ steps.capture_qa_outputs.outputs.report_b64 }} env: BUILD_ID: ${{ after.ios_repack.outputs.build_id || after.ios_build.outputs.build_id }} PR_JSON: ${{ toJSON(github.event.pull_request) }} @@ -144,7 +144,7 @@ jobs: - uses: eas/install_node_modules - id: install_agent_device run: | - npm install -g agent-device@latest cali@0.4.0-4 + npm install -g agent-device@latest cali@0.4.0-5 - id: install_agent_device_skill run: | npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y @@ -158,7 +158,7 @@ jobs: bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true - id: capture_qa_outputs run: | - set-output report_json "$(jq -c . artifacts/qa/report.json)" + set-output report_b64 "$(base64 < artifacts/qa/report.json | tr -d '\n')" compose_comment: name: Compose PR comment @@ -172,20 +172,20 @@ jobs: - uses: eas/checkout - id: install_cali run: | - npm install -g cali@0.4.0-4 + npm install -g cali@0.4.0-5 - id: compose env: - ANDROID_REPORT_JSON: ${{ after.qa_android.outputs.report_json || '' }} - IOS_REPORT_JSON: ${{ after.qa_ios.outputs.report_json || '' }} + ANDROID_REPORT_B64: ${{ after.qa_android.outputs.report_b64 || '' }} + IOS_REPORT_B64: ${{ after.qa_ios.outputs.report_b64 || '' }} run: | mkdir -p artifacts/android artifacts/ios artifacts/combined-comment EXPORT_ARGS=() - if [ -n "${ANDROID_REPORT_JSON}" ]; then - printf '%s' "${ANDROID_REPORT_JSON}" | jq . > artifacts/android/report.json + if [ -n "${ANDROID_REPORT_B64}" ]; then + printf '%s' "${ANDROID_REPORT_B64}" | base64 -d > artifacts/android/report.json EXPORT_ARGS+=(--android artifacts/android/report.json) fi - if [ -n "${IOS_REPORT_JSON}" ]; then - printf '%s' "${IOS_REPORT_JSON}" | jq . > artifacts/ios/report.json + if [ -n "${IOS_REPORT_B64}" ]; then + printf '%s' "${IOS_REPORT_B64}" | base64 -d > artifacts/ios/report.json EXPORT_ARGS+=(--ios artifacts/ios/report.json) fi if [ "${#EXPORT_ARGS[@]}" -eq 0 ]; then From 0f766def81fdf397f4c8fb4e2b42b2b66b14942e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Tue, 14 Apr 2026 11:02:52 +0200 Subject: [PATCH 17/17] fix: pass qa reports as json --- .eas/workflows/agent-qa-mobile.yml | 53 +++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml index 00aba19..6c08f35 100644 --- a/.eas/workflows/agent-qa-mobile.yml +++ b/.eas/workflows/agent-qa-mobile.yml @@ -88,7 +88,7 @@ jobs: environment: preview runs_on: linux-medium-nested-virtualization outputs: - report_b64: ${{ steps.capture_qa_outputs.outputs.report_b64 }} + report_json: ${{ steps.capture_qa_outputs.outputs.report_json }} env: BUILD_ID: ${{ after.android_repack.outputs.build_id || after.android_build.outputs.build_id }} PR_JSON: ${{ toJSON(github.event.pull_request) }} @@ -119,7 +119,7 @@ jobs: bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true - id: capture_qa_outputs run: | - set-output report_b64 "$(base64 < artifacts/qa/report.json | tr -d '\n')" + set-output report_json "$(node -e 'const fs = require("node:fs"); const report = JSON.stringify(JSON.parse(fs.readFileSync("artifacts/qa/report.json", "utf8"))); process.stdout.write(JSON.stringify(report))')" qa_ios: name: iOS agent-device QA @@ -128,7 +128,7 @@ jobs: environment: preview runs_on: macos-medium outputs: - report_b64: ${{ steps.capture_qa_outputs.outputs.report_b64 }} + report_json: ${{ steps.capture_qa_outputs.outputs.report_json }} env: BUILD_ID: ${{ after.ios_repack.outputs.build_id || after.ios_build.outputs.build_id }} PR_JSON: ${{ toJSON(github.event.pull_request) }} @@ -158,7 +158,7 @@ jobs: bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true - id: capture_qa_outputs run: | - set-output report_b64 "$(base64 < artifacts/qa/report.json | tr -d '\n')" + set-output report_json "$(node -e 'const fs = require("node:fs"); const report = JSON.stringify(JSON.parse(fs.readFileSync("artifacts/qa/report.json", "utf8"))); process.stdout.write(JSON.stringify(report))')" compose_comment: name: Compose PR comment @@ -175,18 +175,47 @@ jobs: npm install -g cali@0.4.0-5 - id: compose env: - ANDROID_REPORT_B64: ${{ after.qa_android.outputs.report_b64 || '' }} - IOS_REPORT_B64: ${{ after.qa_ios.outputs.report_b64 || '' }} + ANDROID_REPORT_JSON: ${{ after.qa_android.outputs.report_json || '' }} + IOS_REPORT_JSON: ${{ after.qa_ios.outputs.report_json || '' }} run: | mkdir -p artifacts/android artifacts/ios artifacts/combined-comment + decode_report() { + local platform="$1" + local value="$2" + local output_path="$3" + + REPORT_JSON="${value}" REPORT_OUTPUT_PATH="${output_path}" node <<'NODE' + const fs = require('node:fs'); + + const value = process.env.REPORT_JSON || ''; + const outputPath = process.env.REPORT_OUTPUT_PATH; + + try { + const parsed = JSON.parse(value); + const json = typeof parsed === 'string' ? parsed : JSON.stringify(parsed); + JSON.parse(json); + fs.writeFileSync(outputPath, json); + } catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + } + NODE + } + EXPORT_ARGS=() - if [ -n "${ANDROID_REPORT_B64}" ]; then - printf '%s' "${ANDROID_REPORT_B64}" | base64 -d > artifacts/android/report.json - EXPORT_ARGS+=(--android artifacts/android/report.json) + if [ -n "${ANDROID_REPORT_JSON}" ]; then + if decode_report android "${ANDROID_REPORT_JSON}" artifacts/android/report.json; then + EXPORT_ARGS+=(--android artifacts/android/report.json) + else + echo "Skipping Android report: failed to parse report JSON output." + fi fi - if [ -n "${IOS_REPORT_B64}" ]; then - printf '%s' "${IOS_REPORT_B64}" | base64 -d > artifacts/ios/report.json - EXPORT_ARGS+=(--ios artifacts/ios/report.json) + if [ -n "${IOS_REPORT_JSON}" ]; then + if decode_report ios "${IOS_REPORT_JSON}" artifacts/ios/report.json; then + EXPORT_ARGS+=(--ios artifacts/ios/report.json) + else + echo "Skipping iOS report: failed to parse report JSON output." + fi fi if [ "${#EXPORT_ARGS[@]}" -eq 0 ]; then printf 'Agent QA comment was not produced.\n' > artifacts/combined-comment/ci-comment.md