callstackincubator · thymikee · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.eas/workflows/agent-qa-mobile.yml b/.eas/workflows/agent-qa-mobile.yml
@@ -88,11 +88,7 @@ jobs:
     environment: preview
     runs_on: linux-medium-nested-virtualization
     outputs:
-      status: ${{ steps.run_agent_qa.outputs.status }}
-      status_label: ${{ steps.run_agent_qa.outputs.status_label }}
-      top_issue: ${{ steps.run_agent_qa.outputs.top_issue }}
-      screenshots_cell: ${{ steps.run_agent_qa.outputs.screenshots_cell }}
-      section_body: ${{ steps.run_agent_qa.outputs.section_body }}
+      report_json: ${{ steps.capture_qa_outputs.outputs.report_json }}
     env:
       BUILD_ID: ${{ after.android_repack.outputs.build_id || after.android_build.outputs.build_id }}
       PR_JSON: ${{ toJSON(github.event.pull_request) }}
@@ -106,7 +102,10 @@ jobs:
       - uses: eas/install_node_modules
       - id: install_agent_device
         run: |
-          npm install -g agent-device@0.10.1
+          npm install -g agent-device@latest cali@0.4.0-5
+      - id: install_agent_device_skill
+        run: |
+          npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y
       - uses: eas/download_build
         id: download_build
         with:
@@ -116,12 +115,11 @@ jobs:
         run: |
           bash ./scripts/agent-qa/provision-android-emulator.sh
       - id: run_agent_qa
-        env:
-          AGENT_DEVICE_SESSION: qa-android
-          AGENT_DEVICE_PLATFORM: android
-          AGENT_DEVICE_SESSION_LOCK: strip
         run: |
-          bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}"
+          bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true
+      - id: capture_qa_outputs
+        run: |
+          set-output report_json "$(node -e 'const fs = require("node:fs"); const report = JSON.stringify(JSON.parse(fs.readFileSync("artifacts/qa/report.json", "utf8"))); process.stdout.write(JSON.stringify(report))')"
 
   qa_ios:
     name: iOS agent-device QA
@@ -130,11 +128,7 @@ jobs:
     environment: preview
     runs_on: macos-medium
     outputs:
-      status: ${{ steps.run_agent_qa.outputs.status }}
-      status_label: ${{ steps.run_agent_qa.outputs.status_label }}
-      top_issue: ${{ steps.run_agent_qa.outputs.top_issue }}
-      screenshots_cell: ${{ steps.run_agent_qa.outputs.screenshots_cell }}
-      section_body: ${{ steps.run_agent_qa.outputs.section_body }}
+      report_json: ${{ steps.capture_qa_outputs.outputs.report_json }}
     env:
       BUILD_ID: ${{ after.ios_repack.outputs.build_id || after.ios_build.outputs.build_id }}
       PR_JSON: ${{ toJSON(github.event.pull_request) }}
@@ -150,51 +144,90 @@ jobs:
       - uses: eas/install_node_modules
       - id: install_agent_device
         run: |
-          npm install -g agent-device@0.10.1
+          npm install -g agent-device@latest cali@0.4.0-5
+      - id: install_agent_device_skill
+        run: |
+          npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y
       - uses: eas/download_build
         id: download_build
         with:
           build_id: ${{ env.BUILD_ID }}
           extensions: [app]
-      - id: provision_ios_simulator
-        run: |
-          bash ./scripts/agent-qa/provision-ios-simulator.sh
       - id: run_agent_qa
+        run: |
+          bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}" || true
+      - id: capture_qa_outputs
+        run: |
+          set-output report_json "$(node -e 'const fs = require("node:fs"); const report = JSON.stringify(JSON.parse(fs.readFileSync("artifacts/qa/report.json", "utf8"))); process.stdout.write(JSON.stringify(report))')"
+
+  compose_comment:
+    name: Compose PR comment
+    after: [qa_android, qa_ios]
+    if: ${{ always() }}
+    environment: preview
+    runs_on: linux-medium
+    outputs:
+      payload: ${{ steps.compose.outputs.payload }}
+    steps:
+      - uses: eas/checkout
+      - id: install_cali
+        run: |
+          npm install -g cali@0.4.0-5
+      - id: compose
         env:
-          AGENT_DEVICE_SESSION: qa-ios
-          AGENT_DEVICE_PLATFORM: ios
-          AGENT_DEVICE_SESSION_LOCK: strip
+          ANDROID_REPORT_JSON: ${{ after.qa_android.outputs.report_json || '' }}
+          IOS_REPORT_JSON: ${{ after.qa_ios.outputs.report_json || '' }}
         run: |
-          bash ./scripts/agent-qa/run-and-export.sh "${{ steps.download_build.outputs.artifact_path }}"
+          mkdir -p artifacts/android artifacts/ios artifacts/combined-comment
+          decode_report() {
+            local platform="$1"
+            local value="$2"
+            local output_path="$3"
+
+            REPORT_JSON="${value}" REPORT_OUTPUT_PATH="${output_path}" node <<'NODE'
+          const fs = require('node:fs');
+
+          const value = process.env.REPORT_JSON || '';
+          const outputPath = process.env.REPORT_OUTPUT_PATH;
+
+          try {
+            const parsed = JSON.parse(value);
+            const json = typeof parsed === 'string' ? parsed : JSON.stringify(parsed);
+            JSON.parse(json);
+            fs.writeFileSync(outputPath, json);
+          } catch (error) {
+            console.error(error instanceof Error ? error.message : String(error));
+            process.exit(1);
+          }
+          NODE
+          }
+
+          EXPORT_ARGS=()
+          if [ -n "${ANDROID_REPORT_JSON}" ]; then
+            if decode_report android "${ANDROID_REPORT_JSON}" artifacts/android/report.json; then
+              EXPORT_ARGS+=(--android artifacts/android/report.json)
+            else
+              echo "Skipping Android report: failed to parse report JSON output."
+            fi
+          fi
+          if [ -n "${IOS_REPORT_JSON}" ]; then
+            if decode_report ios "${IOS_REPORT_JSON}" artifacts/ios/report.json; then
+              EXPORT_ARGS+=(--ios artifacts/ios/report.json)
+            else
+              echo "Skipping iOS report: failed to parse report JSON output."
+            fi
+          fi
+          if [ "${#EXPORT_ARGS[@]}" -eq 0 ]; then
+            printf 'Agent QA comment was not produced.\n' > artifacts/combined-comment/ci-comment.md
+          else
+            cali export-ci "${EXPORT_ARGS[@]}" --output-dir artifacts/combined-comment
+          fi
+          set-output payload "$(cat artifacts/combined-comment/ci-comment.md)"
 
   qa_comment:
     name: Comment on PR
-    after: [qa_android, qa_ios]
+    after: [compose_comment]
     if: ${{ always() && github.event_name == 'pull_request' }}
     type: github-comment
     params:
-      payload: |
-        ## Agent QA
-
-        | Platform | Status | Issues |
-        | --- | --- | --- |
-        | Android | ${{ after.qa_android.outputs.status_label || '⛔ blocked' }} | ${{ after.qa_android.outputs.top_issue || 'No Android QA report was produced.' }} |
-        | iOS | ${{ after.qa_ios.outputs.status_label || '⛔ blocked' }} | ${{ after.qa_ios.outputs.top_issue || 'No iOS QA report was produced.' }} |
-
-        ### Screenshots
-
-        | Android | iOS |
-        | --- | --- |
-        | ${{ after.qa_android.outputs.screenshots_cell || 'N/A' }} | ${{ after.qa_ios.outputs.screenshots_cell || 'N/A' }} |
-
-        <details>
-        <summary>Full Android Report</summary>
-
-        ${{ after.qa_android.outputs.section_body || '### Android\n\n**Status:** ⛔ blocked\n\nNo Android QA section was produced.\n' }}
-        </details>
-
-        <details>
-        <summary>Full iOS Report</summary>
-
-        ${{ after.qa_ios.outputs.section_body || '### iOS\n\n**Status:** ⛔ blocked\n\nNo iOS QA section was produced.\n' }}
-        </details>
+      payload: ${{ after.compose_comment.outputs.payload || 'Agent QA comment was not produced.' }}
diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
 # EAS agent-device demo
 
-This repo is a minimal Expo + CNG example for running AI-assisted Android and iOS QA on EAS Workflows.
+This repo is a minimal Expo + CNG example for running AI-assisted Android and iOS QA on EAS Workflows with [`cali`](https://github.com/callstackincubator/cali).
 
 ## What it does
 
 - Reuses compatible Android and iOS simulator builds with `fingerprint` + `get-build` + `repack`
 - Falls back to a fresh `build` when the fingerprint changes
-- Runs a small Node.js QA agent built with the AI SDK `ToolLoopAgent`
+- Uses `cali qa` as the mobile QA agent runtime
 - Uses `agent-device` to drive the Android app and iOS simulator, take screenshots, and summarize findings
 - Posts one combined mobile QA summary back to the GitHub pull request with `github-comment`
 - Optionally uploads screenshots to Vercel Blob so the PR comment can link them
@@ -17,7 +17,8 @@ This repo is a minimal Expo + CNG example for running AI-assisted Android and iO
 
 - [eas.json](./eas.json)
 - [.eas/workflows/agent-qa-mobile.yml](./.eas/workflows/agent-qa-mobile.yml)
-- [scripts/agent-qa/index.ts](./scripts/agent-qa/index.ts)
+- [cali.config.json](./cali.config.json)
+- [scripts/agent-qa/run-and-export.sh](./scripts/agent-qa/run-and-export.sh)
 
 ## Required setup
 
@@ -34,38 +35,20 @@ Optional environment variables for the QA job:
 - `QA_MODEL`: Override the default model (`openai/gpt-5.4-mini`)
 - `BLOB_READ_WRITE_TOKEN`: Upload screenshots to Vercel Blob and include public links in the PR comment
 
-## Local smoke test
+The workflow installs the [`agent-device`](https://www.npmjs.com/package/agent-device) skill explicitly in CI with `npx skills add callstackincubator/agent-device --agent codex --skill agent-device -y`, so Cali can discover it from the standard `.agents/skills` location.
 
-```bash
-npm install
-npx tsc --noEmit
-```
+## CI flow
 
-The workflow runner writes `section.md`, `status.txt`, and `report.json` to `artifacts/qa/` during execution. Temporary screenshots are written outside the workspace and uploaded to Vercel Blob when configured.
+The workflow uses `cali qa --ci eas ...` for each platform and then `cali export-ci` to produce:
 
-To execute the runner directly with Node 24, provide the same environment variables the workflow sets:
+- `artifacts/qa/report.json`
+- `artifacts/qa/section.md`
+- `artifacts/qa/status.txt`
+- `artifacts/qa/summary.txt`
+- `artifacts/qa/top-issue.txt`
+- `artifacts/qa/screenshots.md`
+- `artifacts/qa/screenshots.json`
+- `artifacts/qa/ci-comment.md`
+- `artifacts/qa/ci-output.json`
 
-Android:
-
-```bash
-AI_GATEWAY_API_KEY=... \
-QA_PLATFORM=android \
-APP_PATH=/absolute/path/to/app.apk \
-APPLICATION_ID=dev.expo.easagentdevice \
-BUILD_ID=test-build \
-PR_JSON='{"number":1,"title":"Test PR","body":"Smoke test"}' \
-node ./scripts/agent-qa/index.ts
-```
-
-iOS simulator:
-
-```bash
-AI_GATEWAY_API_KEY=... \
-QA_PLATFORM=ios \
-APP_PATH=/absolute/path/to/MyApp.app \
-APPLICATION_ID=dev.expo.easagentdevice \
-AGENT_DEVICE_IOS_DEVICE="iPhone 17" \
-BUILD_ID=test-build \
-PR_JSON='{"number":1,"title":"Test PR","body":"Smoke test"}' \
-node ./scripts/agent-qa/index.ts
-```
+Android and iOS reports are then combined into one PR comment in the final workflow step.
diff --git a/cali.config.json b/cali.config.json
@@ -0,0 +1,13 @@
+{
+  "skillPaths": [
+    "./node_modules/agent-device/skills"
+  ],
+  "commands": {
+    "qa": {
+      "extraInstructions": [
+        "When you need to verify whether text is visible on screen, prefer `snapshot` over `snapshot -i`. Use `snapshot -i` mainly for interactive exploration and choosing refs.",
+        "When you save screenshots, use short descriptive file names and include matching screenshotLabels so downstream PR comments can label them clearly."
+      ]
+    }
+  }
+}