Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .changeset/metrics-wrapper-session-fallback.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
'@colony/mcp-server': patch
---

Attribute `<unknown>` session metrics via the MCP client-identity fallback

`colony gain` previously bucketed ~9,000 calls/day into a single
`<unknown>` session row. Cause: high-volume read-only tools
(`task_plan_list`, `get_observations`, `search`, `task_timeline`,
`list_sessions`, `examples_list`, …) carry no `session_id` in their
schema, so `metricContextOf` had nothing to attribute the call to.

The metrics wrapper now reuses the same `detectMcpClientIdentity` heuristic
the heartbeat wrapper already runs on every call: env-derived identity
(`CODEX_SESSION_ID`, `CLAUDECODE_SESSION_ID`, `COLONY_CLIENT_SESSION_ID`),
or a stable `mcp-<ppid>` fallback when no signal is available. The
explicit `args.session_id` / `args.current_session_id` path is unchanged;
the fallback only fires when both are absent.

Effect on the savings report: per-client session rows replace the giant
`<unknown>` bucket, making it possible to see which agent / connection is
driving the bulk of the load — the regression-investigation gap that
PR #531's compact-mode fix left open.

No new tool dependencies; the wrapper reads `detectMcpClientIdentity` from
`./heartbeat.js` (already in the same package).
20 changes: 16 additions & 4 deletions apps/mcp-server/src/tools/metrics-wrapper.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { countTokens } from '@colony/compress';
import type { MemoryStore } from '@colony/core';
import type { ToolHandlerWrapper } from './context.js';
import { detectMcpClientIdentity } from './heartbeat.js';

const TEXT_DECODER = new TextEncoder();

Expand Down Expand Up @@ -95,11 +96,22 @@ function isPromiseLike(value: unknown): value is Promise<unknown> {
}

function metricContextOf(value: unknown): Pick<MetricRecord, 'session_id' | 'repo_root'> {
if (!isRecord(value)) return {};
const sessionId = stringField(value.session_id) ?? stringField(value.current_session_id);
const repoRoot = stringField(value.repo_root);
const record = isRecord(value) ? value : undefined;
const sessionFromArgs = record
? (stringField(record.session_id) ?? stringField(record.current_session_id))
: undefined;
// High-volume read-only tools (task_plan_list, get_observations, search,
// task_timeline, list_sessions, examples_list, …) carry no session_id in
// their schema, which used to land every call in the `<unknown>` bucket of
// the savings report — masking ~9k calls/day in 2026-05-14 telemetry. Fall
// back to the same detectMcpClientIdentity heuristic the heartbeat wrapper
// already uses so receipts bucket per actual MCP client connection (codex
// sessions via CODEX_SESSION_ID env, claude via CLAUDECODE_SESSION_ID, etc.)
// instead of collapsing into one giant anonymous row.
const sessionId = sessionFromArgs ?? detectMcpClientIdentity(process.env, value).sessionId;
const repoRoot = record ? stringField(record.repo_root) : undefined;
const context: Pick<MetricRecord, 'session_id' | 'repo_root'> = {};
if (sessionId !== undefined) context.session_id = sessionId;
if (sessionId) context.session_id = sessionId;
if (repoRoot !== undefined) context.repo_root = repoRoot;
return context;
}
Expand Down
66 changes: 66 additions & 0 deletions apps/mcp-server/test/metrics-wrapper.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,70 @@ describe('metrics wrapper', () => {
const handler = wrap('search', async (_args: Record<string, never>) => 'still-ok');
await expect(handler({})).resolves.toBe('still-ok');
});

it('attributes the explicit args.session_id when the tool carries one', async () => {
const wrap = createMetricsWrapper(store);
const handler = wrap(
'task_ready_for_agent',
async (_args: { session_id: string; agent: string }) => 'ok',
);
await handler({ session_id: 'explicit-session-1', agent: 'codex' });

const agg = store.storage.aggregateMcpMetrics({ since: 0 });
const sessions = agg.sessions.map((row) => row.session_id);
expect(sessions).toContain('explicit-session-1');
expect(sessions).not.toContain('<unknown>');
});

it('falls back to the MCP client identity when the tool schema has no session_id', async () => {
// CODEX_SESSION_ID is the highest-confidence signal in
// detectMcpClientIdentity. Setting it on process.env simulates a real
// codex MCP client invoking a session-less tool like task_plan_list.
const previous = process.env.CODEX_SESSION_ID;
process.env.CODEX_SESSION_ID = 'codex-fallback-session';
try {
const wrap = createMetricsWrapper(store);
const handler = wrap('task_plan_list', async (_args: { repo_root?: string }) => 'ok');
// No session_id in args — old behaviour bucketed this as <unknown>.
await handler({ repo_root: '/tmp/repo' });

const agg = store.storage.aggregateMcpMetrics({ since: 0 });
const sessions = agg.sessions.map((row) => row.session_id);
expect(sessions).toContain('codex-fallback-session');
expect(sessions).not.toContain('<unknown>');
} finally {
if (previous === undefined) delete process.env.CODEX_SESSION_ID;
else process.env.CODEX_SESSION_ID = previous;
}
});

it('falls back to a stable mcp-<ppid> bucket when no env signal is available', async () => {
// Clear every env signal detectMcpClientIdentity reads so it can only
// produce the parent-pid fallback. Two calls with no signal should land
// in the SAME bucket (not <unknown>), which is what makes the savings
// report attributable across a session even when nothing else is set.
const restore: Record<string, string | undefined> = {
CODEX_SESSION_ID: process.env.CODEX_SESSION_ID,
CLAUDECODE_SESSION_ID: process.env.CLAUDECODE_SESSION_ID,
CLAUDE_SESSION_ID: process.env.CLAUDE_SESSION_ID,
COLONY_CLIENT_SESSION_ID: process.env.COLONY_CLIENT_SESSION_ID,
};
for (const k of Object.keys(restore)) delete process.env[k];
try {
const wrap = createMetricsWrapper(store);
const handler = wrap('task_plan_list', async (_args: { repo_root?: string }) => 'ok');
await handler({ repo_root: '/tmp/repo' });
await handler({ repo_root: '/tmp/repo' });

const agg = store.storage.aggregateMcpMetrics({ since: 0 });
const fallback = agg.sessions.find((row) => row.session_id === `mcp-${process.ppid}`);
expect(fallback?.calls).toBe(2);
expect(agg.sessions.map((row) => row.session_id)).not.toContain('<unknown>');
} finally {
for (const [k, v] of Object.entries(restore)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
}
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
schema: spec-driven
created: 2026-05-14
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51 (minimal / T1)

Branch: `agent/claude/metrics-wrapper-session-attribution-via-2026-05-14-23-51`

The metrics wrapper now derives `session_id` for sessionless tools via the
same `detectMcpClientIdentity` heuristic the heartbeat already runs per
call. Eliminates the `<unknown>` bucket from `colony gain` for tools like
`task_plan_list` / `get_observations` / `search` / `task_timeline` whose
schemas don't carry `session_id`. Explicit `session_id` args still win;
fallback only fires when both `args.session_id` and
`args.current_session_id` are absent.

## Handoff

- Handoff: change=`agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51`; branch=`agent/claude/metrics-wrapper-session-attribution-via-2026-05-14-23-51`; scope=`apps/mcp-server only`; action=`finish via PR after user sign-off`.

## Cleanup

- [ ] Run: `gx branch finish --branch agent/claude/metrics-wrapper-session-attribution-via-2026-05-14-23-51 --base main --via-pr --wait-for-merge --cleanup`
- [ ] Record PR URL + `MERGED` state in the completion handoff.
- [ ] Confirm sandbox worktree is gone (`git worktree list`, `git branch -a`).
Loading