From 1b3a3e540990b4829c05d493a1df241d028abdde Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Thu, 14 May 2026 23:54:46 +0200 Subject: [PATCH] fix(mcp-server): attribute mcp_metrics session via client-identity fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `colony gain` was bucketing ~9,000 calls/day into a single `` session row because high-volume read-only tools (task_plan_list, get_observations, search, task_timeline, list_sessions, examples_list, …) have no `session_id` in their schema — `metricContextOf` had nothing to attribute the call to and recorded `session_id: null`, which the sessions aggregate SQL collapses into ``. The metrics wrapper now reuses the same `detectMcpClientIdentity` heuristic the heartbeat wrapper already runs per call: env-derived identity (CODEX_SESSION_ID, CLAUDECODE_SESSION_ID, COLONY_CLIENT_SESSION_ID) or the stable `mcp-` fallback when no signal is available. Explicit `args.session_id` / `args.current_session_id` still win — the fallback only fires when both are absent. Effect: per-client session rows replace the giant `` bucket in `colony gain` so the next regression investigation can see WHICH agent is driving the bulk of the load — the gap PR #531's compact-mode fix left open. Tests (apps/mcp-server/test/metrics-wrapper.test.ts): * explicit args.session_id still wins (regression guard) * sessionless tool + CODEX_SESSION_ID env → codex session bucket * sessionless tool + no env signal → stable `mcp-` bucket (two calls collapse into one row, not 2 `` rows) mcp-server suite: 276/276 PASS. Lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../metrics-wrapper-session-fallback.md | 26 ++++++++ apps/mcp-server/src/tools/metrics-wrapper.ts | 20 ++++-- apps/mcp-server/test/metrics-wrapper.test.ts | 66 +++++++++++++++++++ .../.openspec.yaml | 2 + .../notes.md | 21 ++++++ 5 files changed, 131 insertions(+), 4 deletions(-) create mode 100644 .changeset/metrics-wrapper-session-fallback.md create mode 100644 openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/.openspec.yaml create mode 100644 openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/notes.md diff --git a/.changeset/metrics-wrapper-session-fallback.md b/.changeset/metrics-wrapper-session-fallback.md new file mode 100644 index 00000000..97f2388e --- /dev/null +++ b/.changeset/metrics-wrapper-session-fallback.md @@ -0,0 +1,26 @@ +--- +'@colony/mcp-server': patch +--- + +Attribute `` session metrics via the MCP client-identity fallback + +`colony gain` previously bucketed ~9,000 calls/day into a single +`` session row. Cause: high-volume read-only tools +(`task_plan_list`, `get_observations`, `search`, `task_timeline`, +`list_sessions`, `examples_list`, …) carry no `session_id` in their +schema, so `metricContextOf` had nothing to attribute the call to. + +The metrics wrapper now reuses the same `detectMcpClientIdentity` heuristic +the heartbeat wrapper already runs on every call: env-derived identity +(`CODEX_SESSION_ID`, `CLAUDECODE_SESSION_ID`, `COLONY_CLIENT_SESSION_ID`), +or a stable `mcp-` fallback when no signal is available. The +explicit `args.session_id` / `args.current_session_id` path is unchanged; +the fallback only fires when both are absent. + +Effect on the savings report: per-client session rows replace the giant +`` bucket, making it possible to see which agent / connection is +driving the bulk of the load — the regression-investigation gap that +PR #531's compact-mode fix left open. + +No new tool dependencies; the wrapper reads `detectMcpClientIdentity` from +`./heartbeat.js` (already in the same package). diff --git a/apps/mcp-server/src/tools/metrics-wrapper.ts b/apps/mcp-server/src/tools/metrics-wrapper.ts index 8e5a2453..8b1b5bf1 100644 --- a/apps/mcp-server/src/tools/metrics-wrapper.ts +++ b/apps/mcp-server/src/tools/metrics-wrapper.ts @@ -1,6 +1,7 @@ import { countTokens } from '@colony/compress'; import type { MemoryStore } from '@colony/core'; import type { ToolHandlerWrapper } from './context.js'; +import { detectMcpClientIdentity } from './heartbeat.js'; const TEXT_DECODER = new TextEncoder(); @@ -95,11 +96,22 @@ function isPromiseLike(value: unknown): value is Promise { } function metricContextOf(value: unknown): Pick { - if (!isRecord(value)) return {}; - const sessionId = stringField(value.session_id) ?? stringField(value.current_session_id); - const repoRoot = stringField(value.repo_root); + const record = isRecord(value) ? value : undefined; + const sessionFromArgs = record + ? (stringField(record.session_id) ?? stringField(record.current_session_id)) + : undefined; + // High-volume read-only tools (task_plan_list, get_observations, search, + // task_timeline, list_sessions, examples_list, …) carry no session_id in + // their schema, which used to land every call in the `` bucket of + // the savings report — masking ~9k calls/day in 2026-05-14 telemetry. Fall + // back to the same detectMcpClientIdentity heuristic the heartbeat wrapper + // already uses so receipts bucket per actual MCP client connection (codex + // sessions via CODEX_SESSION_ID env, claude via CLAUDECODE_SESSION_ID, etc.) + // instead of collapsing into one giant anonymous row. + const sessionId = sessionFromArgs ?? detectMcpClientIdentity(process.env, value).sessionId; + const repoRoot = record ? stringField(record.repo_root) : undefined; const context: Pick = {}; - if (sessionId !== undefined) context.session_id = sessionId; + if (sessionId) context.session_id = sessionId; if (repoRoot !== undefined) context.repo_root = repoRoot; return context; } diff --git a/apps/mcp-server/test/metrics-wrapper.test.ts b/apps/mcp-server/test/metrics-wrapper.test.ts index 274b8e32..c2d7efa1 100644 --- a/apps/mcp-server/test/metrics-wrapper.test.ts +++ b/apps/mcp-server/test/metrics-wrapper.test.ts @@ -101,4 +101,70 @@ describe('metrics wrapper', () => { const handler = wrap('search', async (_args: Record) => 'still-ok'); await expect(handler({})).resolves.toBe('still-ok'); }); + + it('attributes the explicit args.session_id when the tool carries one', async () => { + const wrap = createMetricsWrapper(store); + const handler = wrap( + 'task_ready_for_agent', + async (_args: { session_id: string; agent: string }) => 'ok', + ); + await handler({ session_id: 'explicit-session-1', agent: 'codex' }); + + const agg = store.storage.aggregateMcpMetrics({ since: 0 }); + const sessions = agg.sessions.map((row) => row.session_id); + expect(sessions).toContain('explicit-session-1'); + expect(sessions).not.toContain(''); + }); + + it('falls back to the MCP client identity when the tool schema has no session_id', async () => { + // CODEX_SESSION_ID is the highest-confidence signal in + // detectMcpClientIdentity. Setting it on process.env simulates a real + // codex MCP client invoking a session-less tool like task_plan_list. + const previous = process.env.CODEX_SESSION_ID; + process.env.CODEX_SESSION_ID = 'codex-fallback-session'; + try { + const wrap = createMetricsWrapper(store); + const handler = wrap('task_plan_list', async (_args: { repo_root?: string }) => 'ok'); + // No session_id in args — old behaviour bucketed this as . + await handler({ repo_root: '/tmp/repo' }); + + const agg = store.storage.aggregateMcpMetrics({ since: 0 }); + const sessions = agg.sessions.map((row) => row.session_id); + expect(sessions).toContain('codex-fallback-session'); + expect(sessions).not.toContain(''); + } finally { + if (previous === undefined) delete process.env.CODEX_SESSION_ID; + else process.env.CODEX_SESSION_ID = previous; + } + }); + + it('falls back to a stable mcp- bucket when no env signal is available', async () => { + // Clear every env signal detectMcpClientIdentity reads so it can only + // produce the parent-pid fallback. Two calls with no signal should land + // in the SAME bucket (not ), which is what makes the savings + // report attributable across a session even when nothing else is set. + const restore: Record = { + CODEX_SESSION_ID: process.env.CODEX_SESSION_ID, + CLAUDECODE_SESSION_ID: process.env.CLAUDECODE_SESSION_ID, + CLAUDE_SESSION_ID: process.env.CLAUDE_SESSION_ID, + COLONY_CLIENT_SESSION_ID: process.env.COLONY_CLIENT_SESSION_ID, + }; + for (const k of Object.keys(restore)) delete process.env[k]; + try { + const wrap = createMetricsWrapper(store); + const handler = wrap('task_plan_list', async (_args: { repo_root?: string }) => 'ok'); + await handler({ repo_root: '/tmp/repo' }); + await handler({ repo_root: '/tmp/repo' }); + + const agg = store.storage.aggregateMcpMetrics({ since: 0 }); + const fallback = agg.sessions.find((row) => row.session_id === `mcp-${process.ppid}`); + expect(fallback?.calls).toBe(2); + expect(agg.sessions.map((row) => row.session_id)).not.toContain(''); + } finally { + for (const [k, v] of Object.entries(restore)) { + if (v === undefined) delete process.env[k]; + else process.env[k] = v; + } + } + }); }); diff --git a/openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/.openspec.yaml b/openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/.openspec.yaml new file mode 100644 index 00000000..66dd08a9 --- /dev/null +++ b/openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-14 diff --git a/openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/notes.md b/openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/notes.md new file mode 100644 index 00000000..39d9ac35 --- /dev/null +++ b/openspec/changes/agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51/notes.md @@ -0,0 +1,21 @@ +# agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51 (minimal / T1) + +Branch: `agent/claude/metrics-wrapper-session-attribution-via-2026-05-14-23-51` + +The metrics wrapper now derives `session_id` for sessionless tools via the +same `detectMcpClientIdentity` heuristic the heartbeat already runs per +call. Eliminates the `` bucket from `colony gain` for tools like +`task_plan_list` / `get_observations` / `search` / `task_timeline` whose +schemas don't carry `session_id`. Explicit `session_id` args still win; +fallback only fires when both `args.session_id` and +`args.current_session_id` are absent. + +## Handoff + +- Handoff: change=`agent-claude-metrics-wrapper-session-attribution-via-2026-05-14-23-51`; branch=`agent/claude/metrics-wrapper-session-attribution-via-2026-05-14-23-51`; scope=`apps/mcp-server only`; action=`finish via PR after user sign-off`. + +## Cleanup + +- [ ] Run: `gx branch finish --branch agent/claude/metrics-wrapper-session-attribution-via-2026-05-14-23-51 --base main --via-pr --wait-for-merge --cleanup` +- [ ] Record PR URL + `MERGED` state in the completion handoff. +- [ ] Confirm sandbox worktree is gone (`git worktree list`, `git branch -a`).