recodeee · NagyVikt · May 15, 2026 · May 15, 2026
diff --git a/apps/mcp-server/src/tools/savings.ts b/apps/mcp-server/src/tools/savings.ts
@@ -4,12 +4,29 @@ import {
   savingsLiveComparisonCost,
   savingsReferenceTotals,
 } from '@colony/core';
+import type { McpMetricsAggregateRow } from '@colony/storage';
 import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { z } from 'zod';
 import { type ToolContext, defaultWrapHandler } from './context.js';
 
 const DEFAULT_WINDOW_HOURS = 24;
 const HOUR_MS = 60 * 60_000;
+const ERROR_BREAKDOWN_OPERATION_LIMIT = 8;
+
+interface SavingsErrorBreakdownOperation {
+  operation: string;
+  count: number;
+  last_ts: number | null;
+  error_message: string | null;
+}
+
+interface SavingsErrorBreakdownRow {
+  error_code: string | null;
+  count: number;
+  last_ts: number | null;
+  operations: SavingsErrorBreakdownOperation[];
+  operations_truncated: boolean;
+}
 
 /**
  * Reports live per-operation token usage recorded by the metrics wrapper plus
@@ -108,6 +125,7 @@ export function register(server: McpServer, ctx: ToolContext): void {
           ...(operation !== undefined ? { operation } : {}),
           cost_basis: live.cost_basis,
           totals: live.totals,
+          error_breakdown: buildErrorBreakdown(live.operations),
           operations: live.operations,
           session_summary: live.session_summary,
           sessions: live.sessions,
@@ -159,3 +177,73 @@ function parseCostRate(
   const parsed = Number(fallback);
   return Number.isFinite(parsed) && parsed >= 0 ? parsed : undefined;
 }
+
+function buildErrorBreakdown(
+  rows: ReadonlyArray<McpMetricsAggregateRow>,
+): SavingsErrorBreakdownRow[] {
+  const byCode = new Map<
+    string,
+    {
+      error_code: string | null;
+      count: number;
+      last_ts: number | null;
+      operations: Map<string, SavingsErrorBreakdownOperation>;
+    }
+  >();
+
+  for (const row of rows) {
+    for (const reason of row.error_reasons) {
+      if (reason.count <= 0) continue;
+      const key = reason.error_code ?? '<null>';
+      let bucket = byCode.get(key);
+      if (bucket === undefined) {
+        bucket = {
+          error_code: reason.error_code,
+          count: 0,
+          last_ts: null,
+          operations: new Map(),
+        };
+        byCode.set(key, bucket);
+      }
+      bucket.count += reason.count;
+      bucket.last_ts = maxTs(bucket.last_ts, reason.last_ts);
+
+      const operation = bucket.operations.get(row.operation) ?? {
+        operation: row.operation,
+        count: 0,
+        last_ts: null,
+        error_message: null,
+      };
+      operation.count += reason.count;
+      if ((reason.last_ts ?? 0) >= (operation.last_ts ?? 0)) {
+        operation.last_ts = reason.last_ts;
+        operation.error_message = reason.error_message;
+      }
+      bucket.operations.set(row.operation, operation);
+    }
+  }
+
+  return [...byCode.values()]
+    .sort((a, b) => b.count - a.count || (b.last_ts ?? 0) - (a.last_ts ?? 0))
+    .map((bucket) => {
+      const operations = [...bucket.operations.values()].sort(
+        (a, b) =>
+          b.count - a.count ||
+          (b.last_ts ?? 0) - (a.last_ts ?? 0) ||
+          a.operation.localeCompare(b.operation),
+      );
+      return {
+        error_code: bucket.error_code,
+        count: bucket.count,
+        last_ts: bucket.last_ts,
+        operations: operations.slice(0, ERROR_BREAKDOWN_OPERATION_LIMIT),
+        operations_truncated: operations.length > ERROR_BREAKDOWN_OPERATION_LIMIT,
+      };
+    });
+}
+
+function maxTs(a: number | null, b: number | null): number | null {
+  if (a === null) return b;
+  if (b === null) return a;
+  return Math.max(a, b);
+}
diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts
@@ -331,6 +331,94 @@ describe('MCP server', () => {
     expect(payload.reference).toBeUndefined();
   });
 
+  it('savings_report returns an additive error_breakdown grouped by error code', async () => {
+    const now = Date.now();
+    store.storage.recordMcpMetric({
+      ts: now - 30_000,
+      operation: 'task_post',
+      input_bytes: 100,
+      output_bytes: 200,
+      input_tokens: 10,
+      output_tokens: 20,
+      duration_ms: 12,
+      ok: false,
+      error_code: 'TASK_NOT_FOUND',
+      error_message: 'task 999 not found',
+    });
+    store.storage.recordMcpMetric({
+      ts: now - 20_000,
+      operation: 'task_claim_file',
+      input_bytes: 100,
+      output_bytes: 200,
+      input_tokens: 15,
+      output_tokens: 25,
+      duration_ms: 10,
+      ok: false,
+      error_code: 'TASK_NOT_FOUND',
+      error_message: 'task 1000 not found',
+    });
+    store.storage.recordMcpMetric({
+      ts: now - 10_000,
+      operation: 'task_claim_file',
+      input_bytes: 100,
+      output_bytes: 200,
+      input_tokens: 5,
+      output_tokens: 10,
+      duration_ms: 9,
+      ok: false,
+      error_code: 'SCOUT_NO_CLAIM',
+      error_message: 'scout cannot claim files',
+    });
+
+    const res = await client.callTool({
+      name: 'savings_report',
+      arguments: { hours: 1 },
+    });
+    const text = (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}';
+    const payload = JSON.parse(text) as {
+      live: {
+        error_breakdown: Array<{
+          error_code: string | null;
+          count: number;
+          last_ts: number | null;
+          operations_truncated: boolean;
+          operations: Array<{
+            operation: string;
+            count: number;
+            last_ts: number | null;
+            error_message: string | null;
+          }>;
+        }>;
+      };
+    };
+
+    expect(payload.live.error_breakdown).toHaveLength(2);
+    expect(payload.live.error_breakdown[0]).toMatchObject({
+      error_code: 'TASK_NOT_FOUND',
+      count: 2,
+      last_ts: now - 20_000,
+      operations_truncated: false,
+    });
+    expect(payload.live.error_breakdown[0]?.operations).toEqual([
+      {
+        operation: 'task_claim_file',
+        count: 1,
+        last_ts: now - 20_000,
+        error_message: 'task 1000 not found',
+      },
+      {
+        operation: 'task_post',
+        count: 1,
+        last_ts: now - 30_000,
+        error_message: 'task 999 not found',
+      },
+    ]);
+    expect(payload.live.error_breakdown[1]).toMatchObject({
+      error_code: 'SCOUT_NO_CLAIM',
+      count: 1,
+    });
+  });
+
   it('task_post reports a structured error for stale task ids', async () => {
     store.startSession({ id: 's-post', ide: 'test', cwd: '/tmp' });
 

diff --git a/...anges/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/.openspec.yaml b/...anges/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/.openspec.yaml
@@ -0,0 +1,2 @@
+schema: spec-driven
+created: 2026-05-15
diff --git a/...gent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/proposal.md b/...gent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/proposal.md
@@ -0,0 +1,22 @@
+## Why
+
+`savings_report` already exposes per-operation `error_reasons`, but operators
+who are diagnosing a noisy fleet need the inverse view: which error code is
+dominant across all operations, and which operations contribute to that code.
+Without that additive grouping, the caller has to fetch the whole live operation
+array and recompute counts client-side.
+
+## What Changes
+
+- Add `live.error_breakdown` to `savings_report`.
+- Group live mcp_metrics errors by `error_code`, preserving total count, latest
+  timestamp, and top contributing operations.
+- Keep the existing `live.operations[*].error_reasons` payload unchanged for
+  compatibility.
+
+## Impact
+
+- Affected surface: `apps/mcp-server/src/tools/savings.ts`.
+- The new field is additive JSON output and should not break existing clients.
+- Operation lists under each error code are bounded to keep the MCP response
+  compact.
diff --git a/...6-05-15-16-13/specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md b/...6-05-15-16-13/specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md
@@ -0,0 +1,15 @@
+## ADDED Requirements
+
+### Requirement: `savings_report` includes additive per-error-code breakdown
+The system SHALL include a `live.error_breakdown` array in `savings_report`
+responses that groups live mcp_metrics errors by `error_code` across all
+returned operations.
+
+#### Scenario: Error codes aggregate across operations
+- **WHEN** multiple operations fail with the same `error_code` in the requested savings window
+- **THEN** `live.error_breakdown` includes one row for that code with the summed count
+- **AND** that row lists contributing operations with their counts, latest timestamp, and latest message.
+
+#### Scenario: Existing per-operation error reasons remain available
+- **WHEN** `savings_report` returns the new `live.error_breakdown`
+- **THEN** existing `live.operations[*].error_reasons` fields remain unchanged.
diff --git a/...s/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md b/...s/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md
@@ -0,0 +1,36 @@
+## Definition of Done
+
+This change is complete only when **all** of the following are true:
+
+- Every checkbox below is checked.
+- The agent branch reaches `MERGED` state on `origin` and the PR URL + state are recorded in the completion handoff.
+- If any step blocks (test failure, conflict, ambiguous result), append a `BLOCKED:` line under section 4 explaining the blocker and **STOP**. Do not tick remaining cleanup boxes; do not silently skip the cleanup pipeline.
+
+## Handoff
+
+- Handoff: change=`agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`; branch=`agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`; scope=`apps/mcp-server/src/tools/savings.ts`; action=`finish PR handoff after verification`.
+- Copy prompt: Continue `agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13` on branch `agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`. Work inside the existing sandbox, review `openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13 --base main --via-pr --cleanup`.
+
+## 1. Specification
+
+- [x] 1.1 Finalize proposal scope and acceptance criteria for `agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`.
+- [x] 1.2 Define normative requirements in `specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md`.
+
+## 2. Implementation
+
+- [x] 2.1 Implement scoped behavior changes.
+- [x] 2.2 Add/update focused regression coverage.
+
+## 3. Verification
+
+- [x] 3.1 Run targeted project verification commands.
+  - `pnpm --filter @colony/mcp-server test -- server.test.ts`
+  - `pnpm --filter @colony/mcp-server typecheck`
+- [x] 3.2 Run `openspec validate agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13 --type change --strict`.
+- [x] 3.3 Run `openspec validate --specs`.
+
+## 4. Cleanup (mandatory; run before claiming completion)
+
+- [ ] 4.1 Run the cleanup pipeline: `gx branch finish --branch agent/<your-name>/<branch-slug> --base dev --via-pr --wait-for-merge --cleanup`. This handles commit -> push -> PR create -> merge wait -> worktree prune in one invocation.
+- [ ] 4.2 Record the PR URL and final merge state (`MERGED`) in the completion handoff.
+- [ ] 4.3 Confirm the sandbox worktree is gone (`git worktree list` no longer shows the agent path; `git branch -a` shows no surviving local/remote refs for the branch).