From 0b9e5d70c1b2d8f3e9575aebfd097f5cf0727a6b Mon Sep 17 00:00:00 2001
From: aaight
Date: Tue, 23 Jun 2026 15:14:49 +0200
Subject: [PATCH 1/3] feat(scm): add Webhook Signing Secret field to SCM tab
(#1429)
Co-authored-by: Cascade Bot
---
.../unit/web/scm-webhook-secret-field.test.ts | 103 ++++++++++++++++++
.../projects/integration-scm-tab.tsx | 15 +++
2 files changed, 118 insertions(+)
create mode 100644 tests/unit/web/scm-webhook-secret-field.test.ts
diff --git a/tests/unit/web/scm-webhook-secret-field.test.ts b/tests/unit/web/scm-webhook-secret-field.test.ts
new file mode 100644
index 000000000..3b0c14b30
--- /dev/null
+++ b/tests/unit/web/scm-webhook-secret-field.test.ts
@@ -0,0 +1,103 @@
+/**
+ * Regression guard for the GitHub Webhook Signing Secret field (MNG-1657).
+ *
+ * `GitHubWebhookSection` is a hook-heavy JSX component (uses `useQuery`,
+ * `useMutation`, `useQueryClient`, and `ProjectSecretField` which pulls
+ * React from `web/node_modules`). It cannot be rendered as a plain function
+ * outside a React rendering context, and the unit environment has no jsdom.
+ * This test reads the source directly — the same source-read pattern used by
+ * `combobox.test.ts` and `pm-wizard-styling-guard.test.ts`.
+ *
+ * The backend already supports `GITHUB_WEBHOOK_SECRET` (the `webhook_secret`
+ * role on the GitHub SCM integration drives `verifyGitHubWebhookSignature`).
+ * This story is the UI-only field to set it; these assertions pin that field's
+ * wiring.
+ */
+
+import { readFileSync } from 'node:fs';
+import { dirname, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { describe, expect, it } from 'vitest';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const REPO_ROOT = resolve(__dirname, '..', '..', '..');
+const SCM_TAB_PATH = resolve(REPO_ROOT, 'web/src/components/projects/integration-scm-tab.tsx');
+
+const source = readFileSync(SCM_TAB_PATH, 'utf8');
+
+/** Region of the file spanning the GitHubWebhookSection component body. */
+function webhookSectionRegion(): string {
+ const start = source.indexOf('function GitHubWebhookSection(');
+ const end = source.indexOf('export function SCMTab');
+ expect(start, 'GitHubWebhookSection function must exist').toBeGreaterThan(-1);
+ expect(end, 'SCMTab function must exist').toBeGreaterThan(start);
+ return source.slice(start, end);
+}
+
+/**
+ * Isolate the `` element bound to the webhook secret.
+ * Splitting on the tag keeps us from accidentally matching the implementer /
+ * reviewer fields in GitHubCredentialSlots (which DO declare onVerify).
+ */
+function webhookSecretFieldElement(): string {
+ const segment = source
+ .split(' s.includes('envVarKey="GITHUB_WEBHOOK_SECRET"'));
+ if (!segment) {
+ throw new Error('a ProjectSecretField bound to GITHUB_WEBHOOK_SECRET must exist');
+ }
+ const closeIdx = segment.indexOf('/>');
+ expect(closeIdx, 'the field element must be self-closing').toBeGreaterThan(-1);
+ return segment.slice(0, closeIdx);
+}
+
+describe('SCM tab — GitHub Webhook Signing Secret field', () => {
+ it('renders a ProjectSecretField bound to envVarKey="GITHUB_WEBHOOK_SECRET" inside GitHubWebhookSection', () => {
+ const region = webhookSectionRegion();
+ expect(region).toContain(' {
+ const fieldIdx = source.indexOf('envVarKey="GITHUB_WEBHOOK_SECRET"');
+ const curlDetailsIdx = source.indexOf('Manual webhook creation');
+ const createButtonIdx = source.indexOf('Create GitHub Webhook');
+ expect(fieldIdx).toBeGreaterThan(-1);
+ expect(curlDetailsIdx).toBeGreaterThan(-1);
+ expect(createButtonIdx).toBeGreaterThan(-1);
+ expect(fieldIdx, 'field must render above the curl ').toBeLessThan(curlDetailsIdx);
+ expect(fieldIdx, 'field must render above the Create button').toBeLessThan(createButtonIdx);
+ });
+
+ it('queries existing credentials via trpc.projects.credentials.list and passes the matching credential', () => {
+ const region = webhookSectionRegion();
+ expect(region).toContain('trpc.projects.credentials.list.queryOptions({ projectId })');
+ expect(region).toContain("c.envVarKey === 'GITHUB_WEBHOOK_SECRET'");
+ // The derived credential is forwarded so the configured badge + masked
+ // last-4 render when the secret is set.
+ expect(webhookSecretFieldElement()).toContain('credential={webhookSecretCred}');
+ });
+
+ it('uses the label "Webhook Signing Secret (optional)"', () => {
+ expect(webhookSecretFieldElement()).toContain('label="Webhook Signing Secret (optional)"');
+ });
+
+ it('describes HMAC-SHA256 verification, skip-when-blank, and side-symmetry', () => {
+ const field = webhookSecretFieldElement();
+ expect(field).toMatch(/description="[^"]*HMAC-SHA256[^"]*"/);
+ expect(field, 'must explain verification is skipped when blank').toMatch(
+ /description="[^"]*skipped[^"]*"/,
+ );
+ expect(field, 'must explain the same value goes on the GitHub side').toMatch(
+ /description="[^"]*same value[^"]*GitHub[^"]*"/,
+ );
+ });
+
+ it('does NOT render a verify button (onVerify omitted)', () => {
+ // The signing secret has no remote identity to resolve. Omitting onVerify
+ // means ProjectSecretField renders only Save/Clear (no Verify button),
+ // while still self-managing persistence + credentials.list invalidation.
+ expect(webhookSecretFieldElement()).not.toContain('onVerify');
+ });
+});
diff --git a/web/src/components/projects/integration-scm-tab.tsx b/web/src/components/projects/integration-scm-tab.tsx
index a09895c50..d92f36c47 100644
--- a/web/src/components/projects/integration-scm-tab.tsx
+++ b/web/src/components/projects/integration-scm-tab.tsx
@@ -102,6 +102,11 @@ function GitHubCredentialSlots({ projectId }: { projectId: string }) {
function GitHubWebhookSection({ projectId }: { projectId: string }) {
const queryClient = useQueryClient();
+ const credentialsQuery = useQuery(trpc.projects.credentials.list.queryOptions({ projectId }));
+ const webhookSecretCred = (credentialsQuery.data ?? []).find(
+ (c) => c.envVarKey === 'GITHUB_WEBHOOK_SECRET',
+ );
+
const callbackBaseUrl =
API_URL ||
(typeof window !== 'undefined' ? window.location.origin.replace(':5173', ':3000') : '');
@@ -169,6 +174,16 @@ function GitHubWebhookSection({ projectId }: { projectId: string }) {
+ {/* Webhook signing secret (optional HMAC verification) */}
+
+
{/* GitHub-specific error */}
{webhooksQuery.data?.errors?.github && (
From 183cb3c7ec7a5695a5f623b428abcd4be467b4e0 Mon Sep 17 00:00:00 2001
From: aaight
Date: Tue, 23 Jun 2026 15:38:28 +0200
Subject: [PATCH 2/3] feat(scm): include signing secret placeholder in GitHub
webhook curl help (#1431)
Add "secret": "" (placeholder only, never the real
value) to the manual webhook curl config object so operators wiring a
GitHub webhook by hand can discover and configure HMAC signature
verification.
- Extract the curl construction into an exported pure helper
buildGithubWebhookCurl(webhookCallbackUrl) so it is unit-testable
without React Query / tRPC providers; GitHubWebhookSection consumes it.
- Add explanatory copy under the curl noting the placeholder must match
the saved Webhook Signing Secret value and that the create-webhook
button injects it automatically once saved (CLI equivalent included).
- New tests/unit/web/scm-github-webhook.test.ts pins the secret
placeholder, callback URL interpolation, and event list.
- Document GitHub's optional GITHUB_WEBHOOK_SECRET in getting-started.md,
matching the Linear/Sentry rows, with a one-line opt-in note.
Co-authored-by: Cascade Bot
Co-authored-by: Claude Opus 4.8
---
docs/getting-started.md | 4 +-
tests/unit/web/scm-github-webhook.test.ts | 72 +++++++++++++++++++
.../projects/integration-scm-tab.tsx | 59 +++++++++++----
3 files changed, 120 insertions(+), 15 deletions(-)
create mode 100644 tests/unit/web/scm-github-webhook.test.ts
diff --git a/docs/getting-started.md b/docs/getting-started.md
index df496e662..a5b2df7f8 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -325,12 +325,14 @@ This creates webhooks on GitHub, Trello, and Jira when those integrations are co
| Provider | Setup behavior | Callback URL |
|----------|----------------|--------------|
-| GitHub | Programmatic create/list/delete | `https://your-router-host/github/webhook` |
+| GitHub | Programmatic create/list/delete with optional `GITHUB_WEBHOOK_SECRET` for HMAC-SHA256 signature verification | `https://your-router-host/github/webhook` |
| Trello | Programmatic create/list/delete | `https://your-router-host/trello/webhook` |
| Jira | Programmatic create/list/delete plus label ensure | `https://your-router-host/jira/webhook` |
| Linear | Manual setup with optional `LINEAR_WEBHOOK_SECRET` | `https://your-router-host/linear/webhook` |
| Sentry | Manual setup with optional Sentry webhook secret; paired with configured `organizationSlug`/`projectSlug` and filtered by payload project matching `projectSlug` | `https://your-router-host/sentry/webhook/my-project` |
+GitHub signature verification is opt-in: set `GITHUB_WEBHOOK_SECRET` (via the dashboard's **Webhook Signing Secret** field or `cascade projects credentials-set --key GITHUB_WEBHOOK_SECRET --value `) and Cascade verifies HMAC-SHA256 on every delivery; leave it unset to skip verification.
+
---
## 10. Configure Triggers
diff --git a/tests/unit/web/scm-github-webhook.test.ts b/tests/unit/web/scm-github-webhook.test.ts
new file mode 100644
index 000000000..d4547b7b2
--- /dev/null
+++ b/tests/unit/web/scm-github-webhook.test.ts
@@ -0,0 +1,72 @@
+/**
+ * Tests for `buildGithubWebhookCurl` (MNG-1658).
+ *
+ * The manual webhook `curl` command was hoisted out of `GitHubWebhookSection`
+ * into an exported pure helper so it can be unit-tested without React Query /
+ * tRPC providers. These assertions pin the curl payload shape: the signing
+ * secret placeholder, the callback URL interpolation, and the full event list.
+ *
+ * Follows the string-assertion conventions in `linear-webhook-step.test.ts`.
+ */
+
+import { describe, expect, it } from 'vitest';
+import { buildGithubWebhookCurl } from '../../../web/src/components/projects/integration-scm-tab.js';
+
+const CALLBACK_URL = 'https://router.example.com/github/webhook';
+
+describe('buildGithubWebhookCurl', () => {
+ it('includes a "secret" field in the config object', () => {
+ const curl = buildGithubWebhookCurl(CALLBACK_URL);
+ expect(curl).toContain('"secret"');
+ });
+
+ it('uses the placeholder, never a real secret value', () => {
+ const curl = buildGithubWebhookCurl(CALLBACK_URL);
+ expect(curl).toContain('"secret": ""');
+ });
+
+ it('interpolates the provided callback URL into the config.url field', () => {
+ const curl = buildGithubWebhookCurl(CALLBACK_URL);
+ expect(curl).toContain(`"url": "${CALLBACK_URL}"`);
+ });
+
+ it('renders the full GitHub event list', () => {
+ const curl = buildGithubWebhookCurl(CALLBACK_URL);
+ for (const event of [
+ 'push',
+ 'pull_request',
+ 'pull_request_review',
+ 'pull_request_review_comment',
+ 'check_suite',
+ 'issue_comment',
+ ]) {
+ expect(curl).toContain(event);
+ }
+ });
+
+ it('targets the GitHub repo hooks API with content_type json', () => {
+ const curl = buildGithubWebhookCurl(CALLBACK_URL);
+ expect(curl).toContain('curl -X POST "https://api.github.com/repos///hooks"');
+ expect(curl).toContain('"content_type": "json"');
+ });
+
+ it('produces a config object whose JSON body parses with url, content_type and secret', () => {
+ const curl = buildGithubWebhookCurl(CALLBACK_URL);
+ // Extract the JSON payload passed to `-d '{ ... }'` and parse it to prove
+ // adding the secret field kept the body valid JSON (no trailing-comma break).
+ const jsonStart = curl.indexOf('{');
+ const jsonEnd = curl.lastIndexOf('}');
+ const payload = JSON.parse(curl.slice(jsonStart, jsonEnd + 1));
+ expect(payload.config).toMatchObject({
+ url: CALLBACK_URL,
+ content_type: 'json',
+ secret: '',
+ });
+ expect(payload.events).toContain('check_suite');
+ });
+
+ it('reflects different callback URLs (pure function, no hidden state)', () => {
+ const other = 'https://other.example.com/github/webhook';
+ expect(buildGithubWebhookCurl(other)).toContain(`"url": "${other}"`);
+ });
+});
diff --git a/web/src/components/projects/integration-scm-tab.tsx b/web/src/components/projects/integration-scm-tab.tsx
index d92f36c47..ef9a9d4e4 100644
--- a/web/src/components/projects/integration-scm-tab.tsx
+++ b/web/src/components/projects/integration-scm-tab.tsx
@@ -99,6 +99,37 @@ function GitHubCredentialSlots({ projectId }: { projectId: string }) {
// GitHub Webhook Management
// ============================================================================
+/**
+ * Build the manual `curl` command an operator can run to register the GitHub
+ * webhook by hand.
+ *
+ * Extracted as a pure function (no React Query / tRPC providers required) so it
+ * can be unit-tested in isolation — see `tests/unit/web/scm-github-webhook.test.ts`.
+ *
+ * The `secret` field is always the literal placeholder ``;
+ * plaintext secrets are NEVER interpolated here or returned to the browser. The
+ * operator substitutes the real value (the same one saved in the Webhook Signing
+ * Secret field) before running the command, or uses the dashboard's create-webhook
+ * button, which injects it server-side from the stored credential.
+ */
+export function buildGithubWebhookCurl(webhookCallbackUrl: string): string {
+ return [
+ 'curl -X POST "https://api.github.com/repos///hooks" \\',
+ ' -H "Authorization: Bearer " \\',
+ ' -H "Content-Type: application/json" \\',
+ " -d '{",
+ ' "name": "web",',
+ ' "active": true,',
+ ' "events": ["push", "pull_request", "pull_request_review", "pull_request_review_comment", "check_suite", "issue_comment"],',
+ ' "config": {',
+ ` "url": "${webhookCallbackUrl}",`,
+ ' "content_type": "json",',
+ ' "secret": ""',
+ ' }',
+ " }'",
+ ].join('\n');
+}
+
function GitHubWebhookSection({ projectId }: { projectId: string }) {
const queryClient = useQueryClient();
@@ -150,20 +181,7 @@ function GitHubWebhookSection({ projectId }: { projectId: string }) {
const webhookCallbackUrl = callbackBaseUrl
? `${callbackBaseUrl}/github/webhook`
: '/github/webhook';
- const githubCurlCommand = [
- 'curl -X POST "https://api.github.com/repos///hooks" \\',
- ' -H "Authorization: Bearer " \\',
- ' -H "Content-Type: application/json" \\',
- " -d '{",
- ' "name": "web",',
- ' "active": true,',
- ' "events": ["push", "pull_request", "pull_request_review", "pull_request_review_comment", "check_suite", "issue_comment"],',
- ' "config": {',
- ` "url": "${webhookCallbackUrl}",`,
- ' "content_type": "json"',
- ' }',
- " }'",
- ].join('\n');
+ const githubCurlCommand = buildGithubWebhookCurl(webhookCallbackUrl);
return (
@@ -264,6 +282,19 @@ function GitHubWebhookSection({ projectId }: { projectId: string }) {
{githubCurlCommand}
+
+ Replace <YOUR_WEBHOOK_SECRET> with the same value you saved in the{' '}
+ Webhook Signing Secret field above — it enables HMAC-SHA256 signature
+ verification on every delivery. The Create GitHub Webhook button
+ injects this secret automatically once it's saved (the server resolves it from your
+ stored credentials), so manual substitution is only needed for this curl fallback. CLI
+ equivalent:{' '}
+
+ cascade projects credentials-set <id> --key GITHUB_WEBHOOK_SECRET --value
+ <secret>
+
+ .
+
From 94d599884c5c745db55f7cf05e7b50cbcd91a182 Mon Sep 17 00:00:00 2001
From: Zbigniew Sobiecki
Date: Tue, 23 Jun 2026 17:18:01 +0200
Subject: [PATCH 3/3] fix(codex): stream per-item LLM-call rows with full tool
detail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Codex collapsed an entire run into one agent_run_llm_calls row written at
turn.completed, with tools stored as bare name strings (input dropped) — so the
dashboard showed one end-of-run row of empty "bash" badges: not realtime, no
command detail. Codex reports token usage only once (cumulative) at
turn.completed, so per-row token attribution is not possible.
Persist a Claude-Code-style content-block row per item.completed as it streams
(text, or tool_use with full input; tool names normalized to the Claude vocab so
the shared parser renders the command/args). Keep the single cumulative
cost/usage row at turn.completed unchanged, so run-total cost stays accurate.
New codex rows are content-block arrays and render via the existing
parseClaudeCodeBlocks path; parseCodexPayload stays as a fallback for old rows.
Also poll the run-detail LLM-calls list (and run status) while the run is active
so the streamed rows appear live.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
src/backends/codex/index.ts | 112 ++++++++++++++----
tests/unit/backends/codex.test.ts | 95 +++++++++++----
tests/unit/utils/llmResponseParser.test.ts | 23 ++++
.../components/llm-calls/llm-call-list.tsx | 10 +-
web/src/routes/runs/$runId.tsx | 10 +-
5 files changed, 204 insertions(+), 46 deletions(-)
diff --git a/src/backends/codex/index.ts b/src/backends/codex/index.ts
index f5ff61770..bce968c14 100644
--- a/src/backends/codex/index.ts
+++ b/src/backends/codex/index.ts
@@ -50,7 +50,6 @@ type JsonRecord = Record;
*/
type CodexTurnAccumulator = {
textSummary: string[];
- toolNames: string[];
usage: UsageSummary | null;
};
@@ -213,10 +212,11 @@ function persistTurnLlmCall(context: CodexLineContext): void {
}
}
+ // Tools/text detail now stream as their own per-item rows (persistItemRow);
+ // the turn.completed row carries the turn's cost/usage + a short text summary.
const turnPayload = JSON.stringify({
turn: context.llmCallCount,
text: acc.textSummary.join(' ').slice(0, 500) || undefined,
- tools: acc.toolNames.length > 0 ? acc.toolNames : undefined,
usage: usage ?? undefined,
delta: delta ?? undefined,
// Reasoning breakdown preserved for observability; it is already counted
@@ -237,7 +237,53 @@ function persistTurnLlmCall(context: CodexLineContext): void {
});
// Reset the accumulator for the next turn
- context.currentTurn = { textSummary: [], toolNames: [], usage: null };
+ context.currentTurn = { textSummary: [], usage: null };
+}
+
+/**
+ * Map a Codex tool name/input onto the Claude-Code tool vocabulary so the shared
+ * `summarizeInput` / `getToolStyle` render the argument and colour. Codex's
+ * command_execution surfaces as lowercase `bash`; function_call names vary.
+ */
+function normalizeCodexTool(
+ name: string,
+ input?: Record,
+): { name: string; input?: Record } {
+ switch (name.toLowerCase()) {
+ case 'bash':
+ case 'shell':
+ return { name: 'Bash', input };
+ case 'read_file':
+ case 'read':
+ return { name: 'Read', input };
+ case 'write_file':
+ case 'write':
+ return { name: 'Write', input };
+ case 'apply_patch':
+ case 'edit_file':
+ case 'edit':
+ return { name: 'Edit', input };
+ default:
+ return { name, input };
+ }
+}
+
+/**
+ * Persist one realtime detail row for a completed Codex item (a text message or a
+ * tool call), stored as a Claude-Code-style content-block array so the shared
+ * response parser renders it identically (tool command/args shown). These rows
+ * carry NO tokens — Codex reports usage only once (cumulative) on turn.completed,
+ * which persistTurnLlmCall records as the single cost-bearing row.
+ */
+function persistItemRow(context: CodexLineContext, block: Record): void {
+ context.llmCallCount += 1;
+ logLlmCall({
+ runId: context.input.runId,
+ callNumber: context.llmCallCount,
+ model: context.model,
+ response: JSON.stringify([block]),
+ engineLabel: 'Codex',
+ });
}
/**
@@ -263,7 +309,7 @@ async function handleStructuralEvent(
}
if (eventType === 'turn.started' || eventType === 'thread.started') {
// Reset turn accumulator at the start of each new turn
- context.currentTurn = { textSummary: [], toolNames: [], usage: null };
+ context.currentTurn = { textSummary: [], usage: null };
return true;
}
if (eventType === 'item.started') {
@@ -275,32 +321,58 @@ async function handleStructuralEvent(
return false;
}
+/**
+ * Log + accumulate text, persisting a realtime text row only when a model ITEM
+ * completes (not for streaming deltas) as a content-block array.
+ */
+function handleCodexText(
+ context: CodexLineContext,
+ textParts: string[],
+ isItemCompleted: boolean,
+): void {
+ for (const text of textParts) {
+ logText(context, text);
+ context.currentTurn.textSummary.push(text.slice(0, 200));
+ }
+ if (isItemCompleted && textParts.length > 0) {
+ persistItemRow(context, { type: 'text', text: textParts.join('') });
+ }
+}
+
+/**
+ * Report a tool call to progress, persisting a realtime tool row (with full
+ * input, normalized to the Claude-Code vocab) only when the item completes.
+ */
+function handleCodexToolCall(
+ context: CodexLineContext,
+ toolCall: { name: string; input?: Record },
+ isItemCompleted: boolean,
+): void {
+ context.input.logWriter('DEBUG', 'Codex tool call', {
+ name: toolCall.name,
+ input: toolCall.input,
+ });
+ context.input.progressReporter.onToolCall(toolCall.name, toolCall.input);
+ if (isItemCompleted) {
+ const normalized = normalizeCodexTool(toolCall.name, toolCall.input);
+ persistItemRow(context, { type: 'tool_use', name: normalized.name, input: normalized.input });
+ }
+}
+
async function handleParsedLine(context: CodexLineContext, parsed: JsonRecord): Promise {
const eventType = typeof parsed.type === 'string' ? parsed.type : '';
if (await handleStructuralEvent(context, parsed, eventType)) return;
const { textParts, toolCall, usage, error } = parseCodexEvent(parsed);
+ const isItemCompleted = eventType === 'item.completed';
if (textParts.length > 0 || toolCall) {
await trackIteration(context);
}
- for (const text of textParts) {
- logText(context, text);
- // Accumulate text into the turn buffer for compact per-call payload
- context.currentTurn.textSummary.push(text.slice(0, 200));
- }
-
- if (toolCall) {
- context.input.logWriter('DEBUG', 'Codex tool call', {
- name: toolCall.name,
- input: toolCall.input,
- });
- context.input.progressReporter.onToolCall(toolCall.name, toolCall.input);
- // Track tool name in turn buffer for the compact payload
- context.currentTurn.toolNames.push(toolCall.name);
- }
+ handleCodexText(context, textParts, isItemCompleted);
+ if (toolCall) handleCodexToolCall(context, toolCall, isItemCompleted);
if (usage) {
context.input.logWriter('DEBUG', 'Codex usage', { usage });
@@ -719,7 +791,7 @@ export class CodexEngine extends NativeToolEngine {
llmCallCount,
cost,
finalError,
- currentTurn: { textSummary: [], toolNames: [], usage: null },
+ currentTurn: { textSummary: [], usage: null },
cumulativeUsage: {
inputTokens: 0,
outputTokens: 0,
diff --git a/tests/unit/backends/codex.test.ts b/tests/unit/backends/codex.test.ts
index 2fe30bebc..3080bbee9 100644
--- a/tests/unit/backends/codex.test.ts
+++ b/tests/unit/backends/codex.test.ts
@@ -959,8 +959,9 @@ describe('CodexEngine', () => {
expect(input.progressReporter.onToolCall).toHaveBeenCalledWith('bash', {
command: 'cascade-tools session finish --comment done',
});
- // Exactly ONE storeLlmCall row per completed turn
- expect(mockStoreLlmCall).toHaveBeenCalledTimes(1);
+ // Two realtime per-item rows (text + tool) + one turn.completed cost row.
+ expect(mockStoreLlmCall).toHaveBeenCalledTimes(3);
+ // The cost row carries the turn usage.
expect(mockStoreLlmCall).toHaveBeenCalledWith(
expect.objectContaining({ inputTokens: 100, outputTokens: 50 }),
);
@@ -1041,17 +1042,26 @@ describe('CodexEngine', () => {
const result = await engine.execute(input);
expect(result.success).toBe(true);
- // Exactly two rows — one per completed turn
- expect(mockStoreLlmCall).toHaveBeenCalledTimes(2);
- // Codex emits CUMULATIVE session usage; rows must store per-turn DELTAS.
+ // Two realtime text rows (one per agent_message) interleaved with two
+ // turn.completed cost rows = 4 rows total.
+ expect(mockStoreLlmCall).toHaveBeenCalledTimes(4);
+ // Row 1 = 'First.' text row — a content-block array, no tokens.
+ const firstTextRow = mockStoreLlmCall.mock.calls[0][0] as {
+ response: string;
+ inputTokens?: number;
+ };
+ expect(firstTextRow.inputTokens).toBeUndefined();
+ expect(JSON.parse(firstTextRow.response)).toEqual([{ type: 'text', text: 'First.' }]);
+ // Codex emits CUMULATIVE session usage; the cost rows store per-turn DELTAS.
// Feeding cumulative {50,20} then {80,30} → deltas {50,20} and {30,10}.
+ // Row 2 = turn-1 cost row; row 4 = turn-2 cost row.
expect(mockStoreLlmCall).toHaveBeenNthCalledWith(
- 1,
- expect.objectContaining({ callNumber: 1, inputTokens: 50, outputTokens: 20 }),
+ 2,
+ expect.objectContaining({ callNumber: 2, inputTokens: 50, outputTokens: 20 }),
);
expect(mockStoreLlmCall).toHaveBeenNthCalledWith(
- 2,
- expect.objectContaining({ callNumber: 2, inputTokens: 30, outputTokens: 10 }),
+ 4,
+ expect.objectContaining({ callNumber: 4, inputTokens: 30, outputTokens: 10 }),
);
});
@@ -1088,7 +1098,7 @@ describe('CodexEngine', () => {
);
});
- it('stores a compact turn-scoped payload with text summary and tool names', async () => {
+ it('streams per-item rows (text + tool with input) and a compact turn cost row', async () => {
mockSpawn.mockImplementation((_cmd: string, args: string[]) => {
const outputPath = args[args.indexOf('-o') + 1];
return createMockChild({
@@ -1115,18 +1125,59 @@ describe('CodexEngine', () => {
const input = makeInput({ repoDir: workspaceDir, runId: 'run-payload-shape' });
await engine.execute(input);
- expect(mockStoreLlmCall).toHaveBeenCalledTimes(1);
- const [{ response }] = mockStoreLlmCall.mock.calls[0] as [{ response: string }][];
- const payload = JSON.parse(response) as Record;
- // Payload must be a compact object, NOT a raw JSONL line dump
- expect(payload).toMatchObject({
- turn: 1,
- tools: ['bash'],
- usage: { inputTokens: 30, outputTokens: 10 },
- });
- expect(typeof payload.text).toBe('string');
- // Payload must be reasonably sized (< 2 KB) — not a multi-KB raw event dump
- expect(response.length).toBeLessThan(2000);
+ // 1 text row + 1 tool row + 1 turn.completed cost row.
+ expect(mockStoreLlmCall).toHaveBeenCalledTimes(3);
+ const calls = mockStoreLlmCall.mock.calls as Array<
+ [{ response: string; inputTokens?: number }]
+ >;
+ // Row 1: the agent message as a content-block array (renders via the shared parser).
+ expect(JSON.parse(calls[0][0].response)).toEqual([
+ { type: 'text', text: 'I will run a command.' },
+ ]);
+ // Row 2: the tool call keeps its full input, normalized to the Claude tool vocab.
+ expect(JSON.parse(calls[1][0].response)).toEqual([
+ { type: 'tool_use', name: 'Bash', input: { command: 'ls' } },
+ ]);
+ expect(calls[1][0].inputTokens).toBeUndefined();
+ // Row 3: the compact turn cost row — carries usage/delta, no tool-name dump.
+ const costPayload = JSON.parse(calls[2][0].response) as Record;
+ expect(costPayload).toMatchObject({ turn: 3, usage: { inputTokens: 30, outputTokens: 10 } });
+ expect(costPayload.tools).toBeUndefined();
+ expect(calls[2][0].response.length).toBeLessThan(2000);
+ });
+
+ it('normalizes function_call names and persists only on item.completed (not deltas)', async () => {
+ mockSpawn.mockImplementation((_cmd: string, args: string[]) => {
+ const outputPath = args[args.indexOf('-o') + 1];
+ return createMockChild({
+ stdoutLines: [
+ JSON.stringify({ type: 'turn.started' }),
+ // A streaming text delta must NOT persist a row (only completed items do).
+ JSON.stringify({ type: 'item.delta', delta: { type: 'text_delta', text: 'thinking…' } }),
+ // A completed function_call read_file → normalized to Read, input preserved.
+ JSON.stringify({
+ type: 'item.completed',
+ item: {
+ type: 'function_call',
+ name: 'read_file',
+ arguments: '{"file_path":"src/a.ts"}',
+ },
+ }),
+ JSON.stringify({ type: 'turn.completed', usage: { input_tokens: 5, output_tokens: 2 } }),
+ ],
+ onBeforeClose: () => writeFileSync(outputPath, 'done', 'utf-8'),
+ });
+ });
+
+ const engine = new CodexEngine();
+ await engine.execute(makeInput({ repoDir: workspaceDir, runId: 'run-normalize' }));
+
+ // The delta did not persist; one tool row + one cost row = 2.
+ expect(mockStoreLlmCall).toHaveBeenCalledTimes(2);
+ const toolResponse = (mockStoreLlmCall.mock.calls[0][0] as { response: string }).response;
+ expect(JSON.parse(toolResponse)).toEqual([
+ { type: 'tool_use', name: 'Read', input: { file_path: 'src/a.ts' } },
+ ]);
});
it('does not call storeLlmCall when no turn.completed event fires (no response events only)', async () => {
diff --git a/tests/unit/utils/llmResponseParser.test.ts b/tests/unit/utils/llmResponseParser.test.ts
index 2cf05db89..1e4cfec3a 100644
--- a/tests/unit/utils/llmResponseParser.test.ts
+++ b/tests/unit/utils/llmResponseParser.test.ts
@@ -152,6 +152,29 @@ describe.concurrent('parseLlmResponse', () => {
});
});
+ describe('Codex realtime per-item rows (content-block array)', () => {
+ // Codex now streams one content-block-array row per item.completed, so its
+ // tool calls carry full input and render via the shared Claude-Code path —
+ // no more empty-badge inputSummary.
+ it('renders a codex tool row with the command (normalized to Bash)', () => {
+ const response = JSON.stringify([
+ { type: 'tool_use', name: 'Bash', input: { command: 'git status' } },
+ ]);
+ const result = parseLlmResponse(response);
+ expect(result.blocks).toEqual([
+ { kind: 'tool_use', name: 'Bash', inputSummary: 'git status' },
+ ]);
+ expect(result.toolNames).toEqual(['Bash']);
+ });
+
+ it('renders a codex text row', () => {
+ const response = JSON.stringify([{ type: 'text', text: 'Reviewing the PR.' }]);
+ const result = parseLlmResponse(response);
+ expect(result.blocks).toEqual([{ kind: 'text', text: 'Reviewing the PR.' }]);
+ expect(result.textPreview).toBe('Reviewing the PR.');
+ });
+ });
+
describe('LLMist format (gadget markup)', () => {
const gadget = (name: string, args: Record) => {
const argLines = Object.entries(args)
diff --git a/web/src/components/llm-calls/llm-call-list.tsx b/web/src/components/llm-calls/llm-call-list.tsx
index 9b1dd773e..092db2d32 100644
--- a/web/src/components/llm-calls/llm-call-list.tsx
+++ b/web/src/components/llm-calls/llm-call-list.tsx
@@ -8,6 +8,8 @@ import { LlmCallDetail } from './llm-call-detail.js';
interface LlmCallListProps {
runId: string;
+ /** When the run is still active, poll so newly-persisted calls stream in live. */
+ isRunning?: boolean;
}
type ToolCall = { name: string; inputSummary: string };
@@ -133,10 +135,14 @@ function CallRow({ runId, call, delta, isExpanded, onToggle }: CallRowProps) {
);
}
-export function LlmCallList({ runId }: LlmCallListProps) {
+export function LlmCallList({ runId, isRunning }: LlmCallListProps) {
const [expandedCall, setExpandedCall] = useState(null);
- const callsQuery = useQuery(trpc.runs.listLlmCalls.queryOptions({ runId }));
+ const callsQuery = useQuery({
+ ...trpc.runs.listLlmCalls.queryOptions({ runId }),
+ // While the run is active, poll so per-item rows appear in realtime.
+ refetchInterval: isRunning ? 3000 : false,
+ });
if (callsQuery.isLoading) {
return Loading LLM calls...
;
diff --git a/web/src/routes/runs/$runId.tsx b/web/src/routes/runs/$runId.tsx
index b3790aaac..631b7eaf9 100644
--- a/web/src/routes/runs/$runId.tsx
+++ b/web/src/routes/runs/$runId.tsx
@@ -19,7 +19,11 @@ function RunDetailPage() {
const { runId } = runDetailRoute.useParams();
const [activeTab, setActiveTab] = useState('overview');
- const runQuery = useQuery(trpc.runs.getById.queryOptions({ id: runId }));
+ const runQuery = useQuery({
+ ...trpc.runs.getById.queryOptions({ id: runId }),
+ // Poll while the run is active so status + the live-updating tabs refresh.
+ refetchInterval: (query) => (query.state.data?.status === 'running' ? 5000 : false),
+ });
if (runQuery.isLoading) {
return Loading run...
;
@@ -96,7 +100,9 @@ function RunDetailPage() {
{activeTab === 'overview' && }
{activeTab === 'logs' && }
- {activeTab === 'llm-calls' && }
+ {activeTab === 'llm-calls' && (
+
+ )}
{activeTab === 'debug' && }
);