Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions apps/mcp-server/src/tools/savings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,29 @@ import {
savingsLiveComparisonCost,
savingsReferenceTotals,
} from '@colony/core';
import type { McpMetricsAggregateRow } from '@colony/storage';
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { z } from 'zod';
import { type ToolContext, defaultWrapHandler } from './context.js';

const DEFAULT_WINDOW_HOURS = 24;
const HOUR_MS = 60 * 60_000;
const ERROR_BREAKDOWN_OPERATION_LIMIT = 8;

interface SavingsErrorBreakdownOperation {
operation: string;
count: number;
last_ts: number | null;
error_message: string | null;
}

interface SavingsErrorBreakdownRow {
error_code: string | null;
count: number;
last_ts: number | null;
operations: SavingsErrorBreakdownOperation[];
operations_truncated: boolean;
}

/**
* Reports live per-operation token usage recorded by the metrics wrapper plus
Expand Down Expand Up @@ -108,6 +125,7 @@ export function register(server: McpServer, ctx: ToolContext): void {
...(operation !== undefined ? { operation } : {}),
cost_basis: live.cost_basis,
totals: live.totals,
error_breakdown: buildErrorBreakdown(live.operations),
operations: live.operations,
session_summary: live.session_summary,
sessions: live.sessions,
Expand Down Expand Up @@ -159,3 +177,73 @@ function parseCostRate(
const parsed = Number(fallback);
return Number.isFinite(parsed) && parsed >= 0 ? parsed : undefined;
}

function buildErrorBreakdown(
rows: ReadonlyArray<McpMetricsAggregateRow>,
): SavingsErrorBreakdownRow[] {
const byCode = new Map<
string,
{
error_code: string | null;
count: number;
last_ts: number | null;
operations: Map<string, SavingsErrorBreakdownOperation>;
}
>();

for (const row of rows) {
for (const reason of row.error_reasons) {
if (reason.count <= 0) continue;
const key = reason.error_code ?? '<null>';
let bucket = byCode.get(key);
if (bucket === undefined) {
bucket = {
error_code: reason.error_code,
count: 0,
last_ts: null,
operations: new Map(),
};
byCode.set(key, bucket);
}
bucket.count += reason.count;
bucket.last_ts = maxTs(bucket.last_ts, reason.last_ts);

const operation = bucket.operations.get(row.operation) ?? {
operation: row.operation,
count: 0,
last_ts: null,
error_message: null,
};
operation.count += reason.count;
if ((reason.last_ts ?? 0) >= (operation.last_ts ?? 0)) {
operation.last_ts = reason.last_ts;
operation.error_message = reason.error_message;
}
bucket.operations.set(row.operation, operation);
}
}

return [...byCode.values()]
.sort((a, b) => b.count - a.count || (b.last_ts ?? 0) - (a.last_ts ?? 0))
.map((bucket) => {
const operations = [...bucket.operations.values()].sort(
(a, b) =>
b.count - a.count ||
(b.last_ts ?? 0) - (a.last_ts ?? 0) ||
a.operation.localeCompare(b.operation),
);
return {
error_code: bucket.error_code,
count: bucket.count,
last_ts: bucket.last_ts,
operations: operations.slice(0, ERROR_BREAKDOWN_OPERATION_LIMIT),
operations_truncated: operations.length > ERROR_BREAKDOWN_OPERATION_LIMIT,
};
});
}

function maxTs(a: number | null, b: number | null): number | null {
if (a === null) return b;
if (b === null) return a;
return Math.max(a, b);
}
88 changes: 88 additions & 0 deletions apps/mcp-server/test/server.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,94 @@ describe('MCP server', () => {
expect(payload.reference).toBeUndefined();
});

it('savings_report returns an additive error_breakdown grouped by error code', async () => {
const now = Date.now();
store.storage.recordMcpMetric({
ts: now - 30_000,
operation: 'task_post',
input_bytes: 100,
output_bytes: 200,
input_tokens: 10,
output_tokens: 20,
duration_ms: 12,
ok: false,
error_code: 'TASK_NOT_FOUND',
error_message: 'task 999 not found',
});
store.storage.recordMcpMetric({
ts: now - 20_000,
operation: 'task_claim_file',
input_bytes: 100,
output_bytes: 200,
input_tokens: 15,
output_tokens: 25,
duration_ms: 10,
ok: false,
error_code: 'TASK_NOT_FOUND',
error_message: 'task 1000 not found',
});
store.storage.recordMcpMetric({
ts: now - 10_000,
operation: 'task_claim_file',
input_bytes: 100,
output_bytes: 200,
input_tokens: 5,
output_tokens: 10,
duration_ms: 9,
ok: false,
error_code: 'SCOUT_NO_CLAIM',
error_message: 'scout cannot claim files',
});

const res = await client.callTool({
name: 'savings_report',
arguments: { hours: 1 },
});
const text = (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}';
const payload = JSON.parse(text) as {
live: {
error_breakdown: Array<{
error_code: string | null;
count: number;
last_ts: number | null;
operations_truncated: boolean;
operations: Array<{
operation: string;
count: number;
last_ts: number | null;
error_message: string | null;
}>;
}>;
};
};

expect(payload.live.error_breakdown).toHaveLength(2);
expect(payload.live.error_breakdown[0]).toMatchObject({
error_code: 'TASK_NOT_FOUND',
count: 2,
last_ts: now - 20_000,
operations_truncated: false,
});
expect(payload.live.error_breakdown[0]?.operations).toEqual([
{
operation: 'task_claim_file',
count: 1,
last_ts: now - 20_000,
error_message: 'task 1000 not found',
},
{
operation: 'task_post',
count: 1,
last_ts: now - 30_000,
error_message: 'task 999 not found',
},
]);
expect(payload.live.error_breakdown[1]).toMatchObject({
error_code: 'SCOUT_NO_CLAIM',
count: 1,
});
});

it('task_post reports a structured error for stale task ids', async () => {
store.startSession({ id: 's-post', ide: 'test', cwd: '/tmp' });

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
schema: spec-driven
created: 2026-05-15
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
## Why

`savings_report` already exposes per-operation `error_reasons`, but operators
who are diagnosing a noisy fleet need the inverse view: which error code is
dominant across all operations, and which operations contribute to that code.
Without that additive grouping, the caller has to fetch the whole live operation
array and recompute counts client-side.

## What Changes

- Add `live.error_breakdown` to `savings_report`.
- Group live mcp_metrics errors by `error_code`, preserving total count, latest
timestamp, and top contributing operations.
- Keep the existing `live.operations[*].error_reasons` payload unchanged for
compatibility.

## Impact

- Affected surface: `apps/mcp-server/src/tools/savings.ts`.
- The new field is additive JSON output and should not break existing clients.
- Operation lists under each error code are bounded to keep the MCP response
compact.
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
## ADDED Requirements

### Requirement: `savings_report` includes additive per-error-code breakdown
The system SHALL include a `live.error_breakdown` array in `savings_report`
responses that groups live mcp_metrics errors by `error_code` across all
returned operations.

#### Scenario: Error codes aggregate across operations
- **WHEN** multiple operations fail with the same `error_code` in the requested savings window
- **THEN** `live.error_breakdown` includes one row for that code with the summed count
- **AND** that row lists contributing operations with their counts, latest timestamp, and latest message.

#### Scenario: Existing per-operation error reasons remain available
- **WHEN** `savings_report` returns the new `live.error_breakdown`
- **THEN** existing `live.operations[*].error_reasons` fields remain unchanged.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
## Definition of Done

This change is complete only when **all** of the following are true:

- Every checkbox below is checked.
- The agent branch reaches `MERGED` state on `origin` and the PR URL + state are recorded in the completion handoff.
- If any step blocks (test failure, conflict, ambiguous result), append a `BLOCKED:` line under section 4 explaining the blocker and **STOP**. Do not tick remaining cleanup boxes; do not silently skip the cleanup pipeline.

## Handoff

- Handoff: change=`agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`; branch=`agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`; scope=`apps/mcp-server/src/tools/savings.ts`; action=`finish PR handoff after verification`.
- Copy prompt: Continue `agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13` on branch `agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`. Work inside the existing sandbox, review `openspec/changes/agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13/tasks.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent/codex/per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13 --base main --via-pr --cleanup`.

## 1. Specification

- [x] 1.1 Finalize proposal scope and acceptance criteria for `agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13`.
- [x] 1.2 Define normative requirements in `specs/per-error-code-breakdown-in-mcp-metrics-aggregation/spec.md`.

## 2. Implementation

- [x] 2.1 Implement scoped behavior changes.
- [x] 2.2 Add/update focused regression coverage.

## 3. Verification

- [x] 3.1 Run targeted project verification commands.
- `pnpm --filter @colony/mcp-server test -- server.test.ts`
- `pnpm --filter @colony/mcp-server typecheck`
- [x] 3.2 Run `openspec validate agent-codex-per-error-code-breakdown-in-mcp-metrics-2026-05-15-16-13 --type change --strict`.
- [x] 3.3 Run `openspec validate --specs`.

## 4. Cleanup (mandatory; run before claiming completion)

- [ ] 4.1 Run the cleanup pipeline: `gx branch finish --branch agent/<your-name>/<branch-slug> --base dev --via-pr --wait-for-merge --cleanup`. This handles commit -> push -> PR create -> merge wait -> worktree prune in one invocation.
- [ ] 4.2 Record the PR URL and final merge state (`MERGED`) in the completion handoff.
- [ ] 4.3 Confirm the sandbox worktree is gone (`git worktree list` no longer shows the agent path; `git branch -a` shows no surviving local/remote refs for the branch).
Loading