diff --git a/apps/mcp-server/src/tools/savings.ts b/apps/mcp-server/src/tools/savings.ts index 92798fc..49719ad 100644 --- a/apps/mcp-server/src/tools/savings.ts +++ b/apps/mcp-server/src/tools/savings.ts @@ -4,12 +4,29 @@ import { savingsLiveComparisonCost, savingsReferenceTotals, } from '@colony/core'; +import type { McpMetricsAggregateRow } from '@colony/storage'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; import { type ToolContext, defaultWrapHandler } from './context.js'; const DEFAULT_WINDOW_HOURS = 24; const HOUR_MS = 60 * 60_000; +const ERROR_BREAKDOWN_OPERATION_LIMIT = 8; + +interface SavingsErrorBreakdownOperation { + operation: string; + count: number; + last_ts: number | null; + error_message: string | null; +} + +interface SavingsErrorBreakdownRow { + error_code: string | null; + count: number; + last_ts: number | null; + operations: SavingsErrorBreakdownOperation[]; + operations_truncated: boolean; +} /** * Reports live per-operation token usage recorded by the metrics wrapper plus @@ -108,6 +125,7 @@ export function register(server: McpServer, ctx: ToolContext): void { ...(operation !== undefined ? { operation } : {}), cost_basis: live.cost_basis, totals: live.totals, + error_breakdown: buildErrorBreakdown(live.operations), operations: live.operations, session_summary: live.session_summary, sessions: live.sessions, @@ -159,3 +177,73 @@ function parseCostRate( const parsed = Number(fallback); return Number.isFinite(parsed) && parsed >= 0 ? parsed : undefined; } + +function buildErrorBreakdown( + rows: ReadonlyArray, +): SavingsErrorBreakdownRow[] { + const byCode = new Map< + string, + { + error_code: string | null; + count: number; + last_ts: number | null; + operations: Map; + } + >(); + + for (const row of rows) { + for (const reason of row.error_reasons) { + if (reason.count <= 0) continue; + const key = reason.error_code ?? ''; + let bucket = byCode.get(key); + if (bucket === undefined) { + bucket = { + error_code: reason.error_code, + count: 0, + last_ts: null, + operations: new Map(), + }; + byCode.set(key, bucket); + } + bucket.count += reason.count; + bucket.last_ts = maxTs(bucket.last_ts, reason.last_ts); + + const operation = bucket.operations.get(row.operation) ?? { + operation: row.operation, + count: 0, + last_ts: null, + error_message: null, + }; + operation.count += reason.count; + if ((reason.last_ts ?? 0) >= (operation.last_ts ?? 0)) { + operation.last_ts = reason.last_ts; + operation.error_message = reason.error_message; + } + bucket.operations.set(row.operation, operation); + } + } + + return [...byCode.values()] + .sort((a, b) => b.count - a.count || (b.last_ts ?? 0) - (a.last_ts ?? 0)) + .map((bucket) => { + const operations = [...bucket.operations.values()].sort( + (a, b) => + b.count - a.count || + (b.last_ts ?? 0) - (a.last_ts ?? 0) || + a.operation.localeCompare(b.operation), + ); + return { + error_code: bucket.error_code, + count: bucket.count, + last_ts: bucket.last_ts, + operations: operations.slice(0, ERROR_BREAKDOWN_OPERATION_LIMIT), + operations_truncated: operations.length > ERROR_BREAKDOWN_OPERATION_LIMIT, + }; + }); +} + +function maxTs(a: number | null, b: number | null): number | null { + if (a === null) return b; + if (b === null) return a; + return Math.max(a, b); +} diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts index 627e388..8d8ecea 100644 --- a/apps/mcp-server/test/server.test.ts +++ b/apps/mcp-server/test/server.test.ts @@ -331,6 +331,94 @@ describe('MCP server', () => { expect(payload.reference).toBeUndefined(); }); + it('savings_report returns an additive error_breakdown grouped by error code', async () => { + const now = Date.now(); + store.storage.recordMcpMetric({ + ts: now - 30_000, + operation: 'task_post', + input_bytes: 100, + output_bytes: 200, + input_tokens: 10, + output_tokens: 20, + duration_ms: 12, + ok: false, + error_code: 'TASK_NOT_FOUND', + error_message: 'task 999 not found', + }); + store.storage.recordMcpMetric({ + ts: now - 20_000, + operation: 'task_claim_file', + input_bytes: 100, + output_bytes: 200, + input_tokens: 15, + output_tokens: 25, + duration_ms: 10, + ok: false, + error_code: 'TASK_NOT_FOUND', + error_message: 'task 1000 not found', + }); + store.storage.recordMcpMetric({ + ts: now - 10_000, + operation: 'task_claim_file', + input_bytes: 100, + output_bytes: 200, + input_tokens: 5, + output_tokens: 10, + duration_ms: 9, + ok: false, + error_code: 'SCOUT_NO_CLAIM', + error_message: 'scout cannot claim files', + }); + + const res = await client.callTool({ + name: 'savings_report', + arguments: { hours: 1 }, + }); + const text = (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}'; + const payload = JSON.parse(text) as { + live: { + error_breakdown: Array<{ + error_code: string | null; + count: number; + last_ts: number | null; + operations_truncated: boolean; + operations: Array<{ + operation: string; + count: number; + last_ts: number | null; + error_message: string | null; + }>; + }>; + }; + }; + + expect(payload.live.error_breakdown).toHaveLength(2); + expect(payload.live.error_breakdown[0]).toMatchObject({ + error_code: 'TASK_NOT_FOUND', + count: 2, + last_ts: now - 20_000, + operations_truncated: false, + }); + expect(payload.live.error_breakdown[0]?.operations).toEqual([ + { + operation: 'task_claim_file', + count: 1, + last_ts: now - 20_000, + error_message: 'task 1000 not found', + }, + { + operation: 'task_post', + count: 1, + last_ts: now - 30_000, + error_message: 'task 999 not found', + }, + ]); + expect(payload.live.error_breakdown[1]).toMatchObject({ + error_code: 'SCOUT_NO_CLAIM', + count: 1, + }); + }); + it('task_post reports a structured error for stale task ids', async () => { store.startSession({ id: 's-post', ide: 'test', cwd: '/tmp' }); diff --git a/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/.openspec.yaml b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/.openspec.yaml new file mode 100644 index 0000000..9f70866 --- /dev/null +++ b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-15 diff --git a/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/proposal.md b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/proposal.md new file mode 100644 index 0000000..24b70fe --- /dev/null +++ b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/proposal.md @@ -0,0 +1,22 @@ +## Why + +`savings_report` already exposes per-operation `error_reasons`, but operators +who are diagnosing a noisy fleet need the inverse view: which error code is +dominant across all operations, and which operations contribute to that code. +Without that additive grouping, the caller has to fetch the whole live operation +array and recompute counts client-side. + +## What Changes + +- Add `live.error_breakdown` to `savings_report`. +- Group live mcp_metrics errors by `error_code`, preserving total count, latest + timestamp, and top contributing operations. +- Keep the existing `live.operations[*].error_reasons` payload unchanged for + compatibility. + +## Impact + +- Affected surface: `apps/mcp-server/src/tools/savings.ts`. +- The new field is additive JSON output and should not break existing clients. +- Operation lists under each error code are bounded to keep the MCP response + compact. diff --git a/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md new file mode 100644 index 0000000..5b3d42d --- /dev/null +++ b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md @@ -0,0 +1,15 @@ +## ADDED Requirements + +### Requirement: `savings_report` includes additive per-error-code breakdown +The system SHALL include a `live.error_breakdown` array in `savings_report` +responses that groups live mcp_metrics errors by `error_code` across all +returned operations. + +#### Scenario: Error codes aggregate across operations +- **WHEN** multiple operations fail with the same `error_code` in the requested savings window +- **THEN** `live.error_breakdown` includes one row for that code with the summed count +- **AND** that row lists contributing operations with their counts, latest timestamp, and latest message. + +#### Scenario: Existing per-operation error reasons remain available +- **WHEN** `savings_report` returns the new `live.error_breakdown` +- **THEN** existing `live.operations[*].error_reasons` fields remain unchanged. diff --git a/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md new file mode 100644 index 0000000..e2e7e42 --- /dev/null +++ b/openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md @@ -0,0 +1,36 @@ +## Definition of Done + +This change is complete only when **all** of the following are true: + +- Every checkbox below is checked. +- The agent branch reaches `MERGED` state on `origin` and the PR URL + state are recorded in the completion handoff. +- If any step blocks (test failure, conflict, ambiguous result), append a `BLOCKED:` line under section 4 explaining the blocker and **STOP**. Do not tick remaining cleanup boxes; do not silently skip the cleanup pipeline. + +## Handoff + +- Handoff: change=`agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`; branch=`agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`; scope=`apps/mcp-server/src/tools/savings.ts`; action=`finish PR handoff after verification`. +- Copy prompt: Continue `agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13` on branch `agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`. Work inside the existing sandbox, review `openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13 --base main --via-pr --cleanup`. + +## 1. Specification + +- [x] 1.1 Finalize proposal scope and acceptance criteria for `agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`. +- [x] 1.2 Define normative requirements in `specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md`. + +## 2. Implementation + +- [x] 2.1 Implement scoped behavior changes. +- [x] 2.2 Add/update focused regression coverage. + +## 3. Verification + +- [x] 3.1 Run targeted project verification commands. + - `pnpm --filter @colony/mcp-server test -- server.test.ts` + - `pnpm --filter @colony/mcp-server typecheck` +- [x] 3.2 Run `openspec validate agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13 --type change --strict`. +- [x] 3.3 Run `openspec validate --specs`. + +## 4. Cleanup (mandatory; run before claiming completion) + +- [ ] 4.1 Run the cleanup pipeline: `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup`. This handles commit -> push -> PR create -> merge wait -> worktree prune in one invocation. +- [ ] 4.2 Record the PR URL and final merge state (`MERGED`) in the completion handoff. +- [ ] 4.3 Confirm the sandbox worktree is gone (`git worktree list` no longer shows the agent path; `git branch -a` shows no surviving local/remote refs for the branch).