diff --git a/.changeset/gain-drift-detector.md b/.changeset/gain-drift-detector.md
new file mode 100644
index 0000000..9bcdda8
--- /dev/null
+++ b/.changeset/gain-drift-detector.md
@@ -0,0 +1,21 @@
+---
+'colonyq': minor
+'@colony/storage': minor
+'@colony/core': minor
+'@colony/mcp-server': minor
+---
+
+`colony gain drift` and a matching `savings_drift_report` MCP tool flag
+tools whose median tokens-per-call has drifted up or down. Default windows
+are non-overlapping: recent = last 3 days, baseline = 14 days ending 3 days
+before recent. Default thresholds: `--threshold 1.25` (up), `--down-threshold
+0.75`, `--min-calls 20` per window. Classifications: `up_drift`,
+`down_drift`, `new_tool` (no baseline), `gone` (no recent), `insufficient_data`,
+`stable`.
+
+Storage gains `Storage.mcpTokenDriftPerOperation()` which computes per-operation
+medians with a `ROW_NUMBER() OVER (PARTITION BY operation ORDER BY tpc)`
+window function — chosen over the correlated `LIMIT 1 OFFSET (COUNT-1)/2`
+form because SQLite forbids outer aggregate references in scalar-subquery
+`OFFSET`. A `mcpMetricsMinTs()` helper surfaces a one-line warning when the
+baseline window starts before the first recorded metric.
diff --git a/apps/cli/src/commands/gain.ts b/apps/cli/src/commands/gain.ts
index 3864a6e..4685957 100644
--- a/apps/cli/src/commands/gain.ts
+++ b/apps/cli/src/commands/gain.ts
@@ -5,6 +5,8 @@ import {
   type SavingsLiveComparisonCost,
   type SavingsReferenceRow,
   type SavingsReferenceTotals,
+  classifyDrift,
+  type DriftReport,
   savingsLiveComparison,
   savingsLiveComparisonCost,
   savingsReferenceTotals,
@@ -40,6 +42,16 @@ interface GainOptions {
   topOps?: string;
 }
 
+interface GainDriftOptions {
+  json?: boolean;
+  baselineDays?: string;
+  recentDays?: string;
+  minCalls?: string;
+  threshold?: string;
+  downThreshold?: string;
+  operation?: string;
+}
+
 export interface MoverRow {
   operation: string;
   recent_calls: number;
@@ -78,7 +90,7 @@ interface TopErrorReason {
 }
 
 export function registerGainCommand(program: Command): void {
-  program
+  const gain = program
     .command('gain')
     .description('Show colony token/cost savings from live mcp_metrics receipts')
     .option('--json', 'emit structured JSON')
@@ -250,6 +262,91 @@ export function registerGainCommand(program: Command): void {
         movers,
       );
     });
+
+  gain
+    .command('drift')
+    .description(
+      'Flag tools whose median tokens-per-call has drifted vs a baseline window (no schema change)',
+    )
+    .option('--baseline-days <n>', 'baseline window length in days (default 14)')
+    .option('--recent-days <n>', 'recent window length in days (default 3)')
+    .option('--min-calls <n>', 'minimum sample size per window to trust signal (default 20)')
+    .option('--threshold <ratio>', 'up-drift trigger ratio (default 1.25 = +25%)')
+    .option('--down-threshold <ratio>', 'down-drift trigger ratio (default 0.75 = -25%)')
+    .option('--operation <name>', 'show only this operation row in the table')
+    .option('--json', 'emit structured JSON')
+    .action(async (opts: GainDriftOptions) => {
+      const settings = loadSettings();
+      const baselineDays = parsePositiveFloat(opts.baselineDays) ?? 14;
+      const recentDays = parsePositiveFloat(opts.recentDays) ?? 3;
+      const minCalls = parsePositiveInt(opts.minCalls) ?? 20;
+      const threshold = parsePositiveFloat(opts.threshold) ?? 1.25;
+      const downThreshold = parsePositiveFloat(opts.downThreshold) ?? 0.75;
+
+      const now = Date.now();
+      const recentSince = now - recentDays * 24 * 60 * 60_000;
+      // 3-day gap between recent and baseline so day-of-week noise does not
+      // bleed across — see spec/v0.x roadmap.
+      const baselineUntil = recentSince - 3 * 24 * 60 * 60_000;
+      const baselineSince = baselineUntil - baselineDays * 24 * 60 * 60_000;
+      const recentUntil = now;
+
+      const { rawRows, minTs } = await withStorage(
+        settings,
+        (storage) => {
+          const allRows = storage.mcpTokenDriftPerOperation({
+            baseline_since: baselineSince,
+            baseline_until: baselineUntil,
+            recent_since: recentSince,
+            recent_until: recentUntil,
+          });
+          const filtered =
+            opts.operation !== undefined
+              ? allRows.filter((row) => row.operation === opts.operation)
+              : allRows;
+          return { rawRows: filtered, minTs: storage.mcpMetricsMinTs() };
+        },
+        { readonly: true },
+      );
+
+      const report = classifyDrift(rawRows, {
+        threshold,
+        down_threshold: downThreshold,
+        min_calls: minCalls,
+      });
+
+      const baselineWarning =
+        minTs !== null && minTs > baselineSince
+          ? `baseline window starts before first recorded metric — drift detection needs ~${
+              Math.ceil((recentDays + baselineDays + 3) - (now - minTs) / (24 * 60 * 60_000))
+            } more day${baselineDays > 1 ? 's' : ''} of history`
+          : null;
+
+      if (opts.json === true) {
+        const payload = {
+          window: {
+            baseline_since: baselineSince,
+            baseline_until: baselineUntil,
+            recent_since: recentSince,
+            recent_until: recentUntil,
+          },
+          threshold: report.threshold,
+          rows: report.rows,
+          new_tools: report.new_tools,
+          gone_tools: report.gone_tools,
+          insufficient_data: report.insufficient_data,
+          ...(baselineWarning !== null ? { warning: baselineWarning } : {}),
+        };
+        process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
+        return;
+      }
+
+      writeDriftReport(report, {
+        recentDays,
+        baselineDays,
+        baselineWarning,
+      });
+    });
 }
 
 export function writeGainReport(
@@ -1175,6 +1272,12 @@ function parsePositiveInt(raw: string | undefined): number | undefined {
   return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : undefined;
 }
 
+function parsePositiveFloat(raw: string | undefined): number | undefined {
+  if (raw === undefined || raw.trim() === '') return undefined;
+  const parsed = Number(raw);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
+}
+
 // rtk-style proportional bar. The row whose value equals `max` gets a full
 // bar; smaller rows scale linearly. Empty when max <= 0.
 export function renderImpactBar(value: number, max: number, width: number): string {
@@ -1586,3 +1689,132 @@ function colorByEfficiency(pct: number, text: string): string {
   if (pct >= 40) return kleur.yellow().bold(text);
   return kleur.red().bold(text);
 }
+
+export interface DriftReportInput {
+  recentDays: number;
+  baselineDays: number;
+  baselineWarning: string | null;
+}
+
+// Plaintext rendering for `colony gain drift`. Mirrors the gain layout
+// (kleur-colored, padded columns). Drift-classified rows print first;
+// new/gone/insufficient sets get one-line summaries underneath so a
+// quick scan answers "is anything regressing?" without re-running.
+export function writeDriftReport(report: DriftReport, input: DriftReportInput): void {
+  const w = process.stdout;
+  const { recentDays, baselineDays, baselineWarning } = input;
+  w.write(
+    `${kleur.bold(
+      `colony gain drift (recent ${formatDaysLabel(recentDays)} vs baseline ${formatDaysLabel(
+        baselineDays,
+      )})`,
+    )}\n`,
+  );
+  w.write(
+    kleur.dim(
+      `Thresholds: up >= ${report.threshold.up.toFixed(2)}x, down <= ${report.threshold.down.toFixed(
+        2,
+      )}x, min ${report.threshold.min_calls} calls per window.\n`,
+    ),
+  );
+  if (baselineWarning !== null) {
+    w.write(`${kleur.yellow('[warn] ')}${baselineWarning}\n`);
+  }
+  const tableRows = report.rows.filter(
+    (row) =>
+      row.classification === 'up_drift' ||
+      row.classification === 'down_drift' ||
+      row.classification === 'stable',
+  );
+  if (tableRows.length === 0 && report.new_tools.length === 0 && report.gone_tools.length === 0) {
+    w.write(kleur.dim('No operations had enough samples in both windows.\n'));
+    if (report.insufficient_data.length > 0) {
+      writeDriftInsufficient(report);
+    }
+    return;
+  }
+  if (tableRows.length > 0) {
+    const widths = [24, 13, 11, 8, 7, 7, 18];
+    const head = padRow(
+      ['Operation', 'Baseline med', 'Recent med', 'Ratio', 'n_base', 'n_rec', 'Class'],
+      widths,
+    );
+    w.write(`${kleur.dim(head)}\n`);
+    // Up-drift first (most urgent), then down, then stable. Within each
+    // bucket keep the storage-emitted alphabetical order so output is
+    // deterministic for tests.
+    const ordered = [
+      ...tableRows.filter((row) => row.classification === 'up_drift'),
+      ...tableRows.filter((row) => row.classification === 'down_drift'),
+      ...tableRows.filter((row) => row.classification === 'stable'),
+    ];
+    for (const row of ordered) {
+      const cells = [
+        truncate(row.operation, widths[0] ?? 24),
+        formatTokens(row.baseline_median ?? 0),
+        formatTokens(row.recent_median ?? 0),
+        formatDriftRatio(row.ratio, row.classification),
+        formatInt(row.baseline_n),
+        formatInt(row.recent_n),
+        formatDriftClass(row.classification),
+      ];
+      w.write(`${padRow(cells, widths)}\n`);
+    }
+  }
+  if (report.insufficient_data.length > 0) {
+    writeDriftInsufficient(report);
+  }
+  if (report.new_tools.length > 0) {
+    w.write(
+      `${kleur.dim('New tools (no baseline):')} ${report.new_tools.join(', ')}\n`,
+    );
+  }
+  if (report.gone_tools.length > 0) {
+    w.write(
+      `${kleur.dim('Gone tools (no recent calls):')} ${report.gone_tools.join(', ')}\n`,
+    );
+  }
+}
+
+function writeDriftInsufficient(report: DriftReport): void {
+  const names = report.insufficient_data
+    .map((row) => row.operation)
+    .slice(0, 12)
+    .join(', ');
+  const more = report.insufficient_data.length > 12
+    ? `, +${report.insufficient_data.length - 12} more`
+    : '';
+  process.stdout.write(
+    `${kleur.dim(`Insufficient data (n<${report.threshold.min_calls}):`)} ${names}${more}\n`,
+  );
+}
+
+function formatDriftRatio(ratio: number | null, classification: DriftReport['rows'][number]['classification']): string {
+  if (ratio === null) return '-';
+  const rounded = ratio >= 10 ? ratio.toFixed(1) : ratio.toFixed(2);
+  if (classification === 'up_drift') return kleur.red(`▲${rounded}x`);
+  if (classification === 'down_drift') return kleur.green(`▼${rounded}x`);
+  return `${rounded}x`;
+}
+
+function formatDriftClass(classification: DriftReport['rows'][number]['classification']): string {
+  switch (classification) {
+    case 'up_drift':
+      return kleur.red('up_drift');
+    case 'down_drift':
+      return kleur.green('down_drift');
+    case 'stable':
+      return kleur.dim('stable');
+    case 'new_tool':
+      return kleur.cyan('new_tool');
+    case 'gone':
+      return kleur.dim('gone');
+    case 'insufficient_data':
+      return kleur.dim('insufficient');
+  }
+}
+
+function formatDaysLabel(days: number): string {
+  if (Number.isInteger(days)) return `${days}d`;
+  return `${days.toFixed(1)}d`;
+}
diff --git a/apps/cli/test/gain-drift.test.ts b/apps/cli/test/gain-drift.test.ts
new file mode 100644
index 0000000..b1a394d
--- /dev/null
+++ b/apps/cli/test/gain-drift.test.ts
@@ -0,0 +1,175 @@
+import { classifyDrift, type DriftRawRow } from '@colony/core';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+import { writeDriftReport } from '../src/commands/gain.js';
+
+// kleur emits ANSI when COLORTERM is set (Anthropic harness frequently
+// flips this on). Strip escapes so substring checks hold regardless of
+// the local color mode.
+const ANSI_RE = new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, 'g');
+const stripAnsi = (chunk: string | Uint8Array): string => String(chunk).replace(ANSI_RE, '');
+
+function capture(): { read: () => string } {
+  let buf = '';
+  vi.spyOn(process.stdout, 'write').mockImplementation((chunk: string | Uint8Array) => {
+    buf += stripAnsi(chunk);
+    return true;
+  });
+  return { read: () => buf };
+}
+
+describe('writeDriftReport', () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('renders header, threshold note, and an up-drift row first', () => {
+    const rows: DriftRawRow[] = [
+      {
+        operation: 'search',
+        baseline_median: 400,
+        baseline_n: 2_140,
+        recent_median: 580,
+        recent_n: 412,
+      },
+      {
+        operation: 'task_post',
+        baseline_median: 180,
+        baseline_n: 980,
+        recent_median: 182,
+        recent_n: 198,
+      },
+    ];
+    const report = classifyDrift(rows, {
+      threshold: 1.25,
+      down_threshold: 0.75,
+      min_calls: 20,
+    });
+    const out = capture();
+    writeDriftReport(report, { recentDays: 3, baselineDays: 14, baselineWarning: null });
+    const text = out.read();
+
+    expect(text).toContain('colony gain drift (recent 3d vs baseline 14d)');
+    expect(text).toContain('Thresholds: up >= 1.25x, down <= 0.75x, min 20 calls');
+    expect(text).toContain('Operation');
+    expect(text).toContain('Baseline med');
+    expect(text).toContain('Class');
+    // up_drift row appears before stable row.
+    const searchIdx = text.indexOf('search');
+    const taskIdx = text.indexOf('task_post');
+    expect(searchIdx).toBeGreaterThan(-1);
+    expect(taskIdx).toBeGreaterThan(searchIdx);
+    expect(text).toContain('up_drift');
+    expect(text).toContain('stable');
+  });
+
+  it('lists new tools, gone tools, and insufficient data under the table', () => {
+    const rows: DriftRawRow[] = [
+      {
+        operation: 'savings_drift_report',
+        baseline_median: null,
+        baseline_n: 0,
+        recent_median: 220,
+        recent_n: 25,
+      },
+      {
+        operation: 'legacy_search',
+        baseline_median: 300,
+        baseline_n: 50,
+        recent_median: null,
+        recent_n: 0,
+      },
+      {
+        operation: 'suggest',
+        baseline_median: 80,
+        baseline_n: 3,
+        recent_median: 88,
+        recent_n: 18,
+      },
+    ];
+    const report = classifyDrift(rows, {
+      threshold: 1.25,
+      down_threshold: 0.75,
+      min_calls: 20,
+    });
+    const out = capture();
+    writeDriftReport(report, { recentDays: 3, baselineDays: 14, baselineWarning: null });
+    const text = out.read();
+    expect(text).toContain('New tools (no baseline): savings_drift_report');
+    expect(text).toContain('Gone tools (no recent calls): legacy_search');
+    expect(text).toContain('Insufficient data (n<20): suggest');
+  });
+
+  it('emits a [warn] line when the baseline window predates the first receipt', () => {
+    const report = classifyDrift([], {
+      threshold: 1.25,
+      down_threshold: 0.75,
+      min_calls: 20,
+    });
+    const out = capture();
+    writeDriftReport(report, {
+      recentDays: 3,
+      baselineDays: 14,
+      baselineWarning:
+        'baseline window starts before first recorded metric — drift detection needs ~5 more days of history',
+    });
+    const text = out.read();
+    expect(text).toContain('[warn]');
+    expect(text).toContain('baseline window starts before first recorded metric');
+  });
+
+  it('classifies a down-drift row separately and renders down_drift class', () => {
+    const rows: DriftRawRow[] = [
+      {
+        operation: 'task_post',
+        baseline_median: 600,
+        baseline_n: 30,
+        recent_median: 300,
+        recent_n: 30,
+      },
+    ];
+    const report = classifyDrift(rows, {
+      threshold: 1.25,
+      down_threshold: 0.75,
+      min_calls: 20,
+    });
+    expect(report.rows[0]?.classification).toBe('down_drift');
+    expect(report.rows[0]?.ratio).toBeCloseTo(0.5, 6);
+    const out = capture();
+    writeDriftReport(report, { recentDays: 3, baselineDays: 14, baselineWarning: null });
+    expect(out.read()).toContain('down_drift');
+  });
+
+  it('classifyDrift round-trips JSON-friendly shape', () => {
+    const rows: DriftRawRow[] = [
+      {
+        operation: 'search',
+        baseline_median: 400,
+        baseline_n: 100,
+        recent_median: 600,
+        recent_n: 100,
+      },
+    ];
+    const report = classifyDrift(rows, {
+      threshold: 1.25,
+      down_threshold: 0.75,
+      min_calls: 20,
+    });
+    expect(JSON.parse(JSON.stringify(report))).toEqual({
+      threshold: { up: 1.25, down: 0.75, min_calls: 20 },
+      rows: [
+        {
+          operation: 'search',
+          baseline_median: 400,
+          baseline_n: 100,
+          recent_median: 600,
+          recent_n: 100,
+          ratio: 1.5,
+          classification: 'up_drift',
+        },
+      ],
+      new_tools: [],
+      gone_tools: [],
+      insufficient_data: [],
+    });
+  });
+});
diff --git a/apps/mcp-server/src/server.ts b/apps/mcp-server/src/server.ts
index 93138b7..b71aee6 100644
--- a/apps/mcp-server/src/server.ts
+++ b/apps/mcp-server/src/server.ts
@@ -29,6 +29,7 @@ import * as recall from './tools/recall.js';
 import * as relay from './tools/relay.js';
 import * as rescue from './tools/rescue.js';
 import * as savings from './tools/savings.js';
+import * as savingsDrift from './tools/savings-drift.js';
 import * as search from './tools/search.js';
 import * as spec from './tools/spec.js';
 import * as startupPanel from './tools/startup-panel.js';
@@ -120,6 +121,7 @@ export function buildServer(
   suggest.register(server, ctx);
   rescue.register(server, ctx);
   savings.register(server, ctx);
+  savingsDrift.register(server, ctx);
 
   // Autopilot lane (tick advisor + drift checker). Cheap compositions of
   // existing primitives; registered after the core surface so the heartbeat
diff --git a/apps/mcp-server/src/tools/savings-drift.ts b/apps/mcp-server/src/tools/savings-drift.ts
new file mode 100644
index 0000000..e68a073
--- /dev/null
+++ b/apps/mcp-server/src/tools/savings-drift.ts
@@ -0,0 +1,136 @@
+import { classifyDrift } from '@colony/core';
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { z } from 'zod';
+import { type ToolContext, defaultWrapHandler } from './context.js';
+
+const DAY_MS = 24 * 60 * 60_000;
+
+/**
+ * Reports per-operation tokens-per-call drift across two non-overlapping
+ * windows (recent vs baseline) using only the existing mcp_metrics table.
+ * No schema change.
+ *
+ * Progressive disclosure: the response is the classified rows plus three
+ * one-line summary arrays (new_tools, gone_tools, insufficient_data) — no
+ * observation bodies, no per-call samples. Callers reach for
+ * savings_report for the wider per-operation view.
+ *
+ * Lives in its own file (rather than tools/drift.ts which already owns
+ * task_drift_check) so the file-edit-drift checker and the token-drift
+ * detector keep separate module surfaces.
+ */
+export function register(server: McpServer, ctx: ToolContext): void {
+  const wrapHandler = ctx.wrapHandler ?? defaultWrapHandler;
+  const { store } = ctx;
+
+  server.tool(
+    'savings_drift_report',
+    'Flag tools whose median tokens-per-call has drifted up or down over a recent window vs a baseline window. Pure read path over mcp_metrics; no schema change. Pass baseline_days/recent_days to scope the windows, min_calls to set the sample-size guard, and threshold/down_threshold to tune up/down sensitivity.',
+    {
+      baseline_days: z
+        .number()
+        .positive()
+        .max(180)
+        .optional()
+        .describe('baseline window length in days; defaults to 14'),
+      recent_days: z
+        .number()
+        .positive()
+        .max(60)
+        .optional()
+        .describe('recent window length in days; defaults to 3'),
+      min_calls: z
+        .number()
+        .int()
+        .min(1)
+        .max(10_000)
+        .optional()
+        .describe('minimum sample size per window before flagging drift; defaults to 20'),
+      threshold: z
+        .number()
+        .positive()
+        .optional()
+        .describe('up-drift trigger ratio (recent_median / baseline_median); defaults to 1.25'),
+      down_threshold: z
+        .number()
+        .positive()
+        .optional()
+        .describe('down-drift trigger ratio; defaults to 0.75'),
+      operation: z
+        .string()
+        .min(1)
+        .optional()
+        .describe('filter rows by exact operation name (e.g. "search")'),
+    },
+    wrapHandler(
+      'savings_drift_report',
+      async ({
+        baseline_days,
+        recent_days,
+        min_calls,
+        threshold,
+        down_threshold,
+        operation,
+      }) => {
+        const baselineDays = baseline_days ?? 14;
+        const recentDays = recent_days ?? 3;
+        const minCalls = min_calls ?? 20;
+        const up = threshold ?? 1.25;
+        const down = down_threshold ?? 0.75;
+
+        const now = Date.now();
+        const recentSince = now - recentDays * DAY_MS;
+        // 3-day gap mirrors the CLI: keeps day-of-week noise from bleeding
+        // across windows.
+        const baselineUntil = recentSince - 3 * DAY_MS;
+        const baselineSince = baselineUntil - baselineDays * DAY_MS;
+        const recentUntil = now;
+
+        const allRows = store.storage.mcpTokenDriftPerOperation({
+          baseline_since: baselineSince,
+          baseline_until: baselineUntil,
+          recent_since: recentSince,
+          recent_until: recentUntil,
+        });
+        const filtered =
+          operation !== undefined
+            ? allRows.filter((row) => row.operation === operation)
+            : allRows;
+
+        const report = classifyDrift(filtered, {
+          threshold: up,
+          down_threshold: down,
+          min_calls: minCalls,
+        });
+
+        const minTs = store.storage.mcpMetricsMinTs();
+        const warning =
+          minTs !== null && minTs > baselineSince
+            ? 'baseline window starts before first recorded metric — drift detection needs more history before signals can be trusted'
+            : null;
+
+        return {
+          content: [
+            {
+              type: 'text',
+              text: JSON.stringify({
+                window: {
+                  baseline_since: baselineSince,
+                  baseline_until: baselineUntil,
+                  recent_since: recentSince,
+                  recent_until: recentUntil,
+                },
+                threshold: report.threshold,
+                rows: report.rows,
+                new_tools: report.new_tools,
+                gone_tools: report.gone_tools,
+                insufficient_data: report.insufficient_data,
+                ...(warning !== null ? { warning } : {}),
+              }),
+            },
+          ],
+        };
+      },
+    ),
+  );
+}
diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts
index 8d8ecea..58de97f 100644
--- a/apps/mcp-server/test/server.test.ts
+++ b/apps/mcp-server/test/server.test.ts
@@ -65,6 +65,7 @@ describe('MCP server', () => {
       'recall_session',
       'rescue_stranded_run',
       'rescue_stranded_scan',
+      'savings_drift_report',
       'savings_report',
       'search',
       'semantic_search',
@@ -419,6 +420,82 @@ describe('MCP server', () => {
     });
   });
 
+  it('savings_drift_report classifies up-drift from mcp_metrics receipts', async () => {
+    const DAY_MS = 24 * 60 * 60_000;
+    const now = Date.now();
+    // Recent window (last 3 days): 25 search receipts @ ~200 tpc.
+    for (let i = 0; i < 25; i += 1) {
+      store.storage.recordMcpMetric({
+        ts: now - i * 1_000,
+        operation: 'search',
+        input_bytes: 100,
+        output_bytes: 200,
+        input_tokens: 60,
+        output_tokens: 140,
+        duration_ms: 5,
+        ok: true,
+      });
+    }
+    // Baseline window: starts after recent + 3-day gap. With default
+    // recent_days=3 and baseline_days=14, baseline is [now - 20d, now - 3d).
+    // Plant baseline at now - 10d so both windows have data with no overlap.
+    const baselineTs = now - 10 * DAY_MS;
+    for (let i = 0; i < 25; i += 1) {
+      store.storage.recordMcpMetric({
+        ts: baselineTs + i * 1_000,
+        operation: 'search',
+        input_bytes: 100,
+        output_bytes: 200,
+        input_tokens: 30,
+        output_tokens: 70,
+        duration_ms: 5,
+        ok: true,
+      });
+    }
+
+    const res = await client.callTool({
+      name: 'savings_drift_report',
+      arguments: { baseline_days: 14, recent_days: 3, min_calls: 20 },
+    });
+    const text = (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}';
+    const payload = JSON.parse(text) as {
+      window: {
+        baseline_since: number;
+        baseline_until: number;
+        recent_since: number;
+        recent_until: number;
+      };
+      threshold: { up: number; down: number; min_calls: number };
+      rows: Array<{
+        operation: string;
+        baseline_median: number | null;
+        recent_median: number | null;
+        baseline_n: number;
+        recent_n: number;
+        ratio: number | null;
+        classification: string;
+      }>;
+      new_tools: string[];
+      gone_tools: string[];
+      insufficient_data: Array<{ operation: string; baseline_n: number; recent_n: number }>;
+    };
+
+    expect(payload.window.recent_until).toBeGreaterThan(payload.window.recent_since);
+    expect(payload.window.baseline_until).toBeLessThan(payload.window.recent_since);
+    expect(payload.threshold).toEqual({ up: 1.25, down: 0.75, min_calls: 20 });
+
+    const search = payload.rows.find((row) => row.operation === 'search');
+    expect(search).toBeDefined();
+    expect(search?.baseline_median).toBe(100);
+    expect(search?.recent_median).toBe(200);
+    expect(search?.baseline_n).toBe(25);
+    expect(search?.recent_n).toBe(25);
+    expect(search?.classification).toBe('up_drift');
+    expect(search?.ratio).toBeCloseTo(2, 6);
+    expect(payload.new_tools).toEqual([]);
+    expect(payload.gone_tools).toEqual([]);
+  });
+
   it('task_post reports a structured error for stale task ids', async () => {
     store.startSession({ id: 's-post', ide: 'test', cwd: '/tmp' });
 
diff --git a/docs/mcp.md b/docs/mcp.md
index e992307..77cfb00 100644
--- a/docs/mcp.md
+++ b/docs/mcp.md
@@ -101,6 +101,7 @@ workflow guidance.
 | Rescue | `rescue_stranded_scan` | Dry-run stranded lane/claim rescue candidates. |
 | Rescue | `rescue_stranded_run` | Emit rescue relays and release abandoned claims. |
 | Metrics | `savings_report` | Report live MCP token receipts and reference savings model. |
+| Metrics | `savings_drift_report` | Flag tools whose median tokens-per-call drifted vs a baseline window. |
 
 ## Ruflo sidecar boundary
 
@@ -2235,6 +2236,36 @@ Response shape:
 
 Live tokens are counted with `@colony/compress#countTokens` — the same primitive that produces observation token receipts, so values line up across surfaces. Estimated USD cost is computed at report time from the live token totals and caller-provided USD-per-1M rates; unset rates keep `cost_basis.configured=false` and report zero cost fields. New failure rows record `error_code` / `error_message`; older failure rows may have unknown reason fields. The reference table is static and sourced from `packages/core/src/savings-reference.ts`; the CLI command `colony gain` and the worker's `/savings` page render the same payload.
 
+## `savings_drift_report`
+
+Flags tools whose median tokens-per-call has drifted up or down over a recent window vs. a baseline window. Pure read path over `mcp_metrics` — no schema change. Mirrors the `colony gain drift` CLI subcommand.
+
+Args:
+
+- `baseline_days?` — baseline window length in days. Defaults to 14, max 180.
+- `recent_days?` — recent window length in days. Defaults to 3, max 60.
+- `min_calls?` — minimum sample size per window before flagging drift. Defaults to 20.
+- `threshold?` — up-drift trigger ratio (`recent_median / baseline_median`). Defaults to 1.25 (+25%).
+- `down_threshold?` — down-drift trigger ratio. Defaults to 0.75 (-25%).
+- `operation?` — filter rows by exact operation name (e.g. `"search"`).
+
+Windows are non-overlapping with a 3-day gap (`baseline` ends 3 days before `recent` begins) so day-of-week noise does not bleed across. Only `ok=1` receipts are considered — retry storms and rejections never inflate the signal. The median is exact (no interpolation) and computed with `ROW_NUMBER()` window aggregation in SQLite.
+
+Response shape:
+
+```json
+{
+  "window": { "baseline_since": 1729000000000, "baseline_until": 1730000000000, "recent_since": 1730259200000, "recent_until": 1730518400000 },
+  "threshold": { "up": 1.25, "down": 0.75, "min_calls": 20 },
+  "rows": [{ "operation": "search", "baseline_median": 412, "baseline_n": 2140, "recent_median": 587, "recent_n": 412, "ratio": 1.42, "classification": "up_drift" }],
+  "new_tools": ["savings_drift_report"],
+  "gone_tools": [],
+  "insufficient_data": [{ "operation": "suggest", "baseline_n": 3, "recent_n": 18 }]
+}
+```
+
+Classifications: `up_drift`, `down_drift`, `new_tool` (no baseline data), `gone` (no recent calls), `insufficient_data` (either window below `min_calls`), or `stable`. When the baseline window starts before the earliest `mcp_metrics` receipt the response adds a `warning` field nudging callers to wait for more history before trusting signals.
+
 ## Plan observation kinds
 
 The lane introduces several observation kinds on the parent spec task and on the sub-task threads. They are written through `MemoryStore.addObservation`, so content is compressed and `metadata` carries the structured payload.
diff --git a/openspec/changes/gain-drift-detector-2026-05-16/CHANGE.md b/openspec/changes/gain-drift-detector-2026-05-16/CHANGE.md
new file mode 100644
index 0000000..ee28a4b
--- /dev/null
+++ b/openspec/changes/gain-drift-detector-2026-05-16/CHANGE.md
@@ -0,0 +1,63 @@
+---
+slug: gain-drift-detector-2026-05-16
+---
+
+# CHANGE · gain-drift-detector-2026-05-16
+
+## §P  proposal
+# Long-run tokens-per-call drift detector
+
+## Problem
+
+README §v0.x "Receipts and observability" calls out a missing long-run
+regression detector for tool token spend. Operators today can spot a single
+hot loop with `colony gain`, but they cannot answer "did `search` quietly
+get 40% more expensive in the last three days?" without manually charting
+the daily aggregates. We need a deterministic signal that fires when a
+tool's median tokens-per-call drifts outside a configurable band against a
+baseline window.
+
+## Acceptance criteria
+
+- `colony gain drift` ships with sensible defaults
+  (`--baseline-days 14 --recent-days 3 --min-calls 20 --threshold 1.25
+  --down-threshold 0.75`) and a `--json` mode whose payload includes window
+  bounds, classified rows, and `new_tools` / `gone_tools` / `insufficient_data`
+  arrays.
+- `savings_drift_report` MCP tool exposes the same surface, named so it
+  groups with `savings_report` and does not collide with the existing
+  `task_drift_check` (file-edit drift).
+- No schema change. The signal reads `mcp_metrics` only.
+- Windows are non-overlapping with a 3-day gap so day-of-week noise does
+  not bleed across baseline and recent.
+- Only `ok=1` receipts contribute; retry storms cannot inflate the median.
+- A baseline-shorter-than-history warning appears in both CLI and MCP
+  outputs.
+- Storage method, classifier, CLI render, MCP envelope, and listed-tools
+  set are covered by focused tests.
+
+## Sub-tasks
+
+### Sub-task 0: Implement and verify
+
+File scope: packages/storage/src/storage.ts, packages/core/src/drift.ts,
+packages/core/src/index.ts, apps/cli/src/commands/gain.ts,
+apps/mcp-server/src/tools/savings-drift.ts, apps/mcp-server/src/server.ts,
+docs/mcp.md, packages/storage/test/mcp-metrics.test.ts,
+apps/cli/test/gain-drift.test.ts, apps/mcp-server/test/server.test.ts.
+
+Verification: `pnpm --filter @colony/storage test`,
+`pnpm --filter colonyq typecheck`, `pnpm --filter colonyq test -- gain`,
+`pnpm --filter @colony/mcp-server test`, `pnpm --filter colonyq build`.
+
+## §S  delta
+op|target|row
+-|-|-
+
+## §T  tasks
+id|status|task|cites
+-|-|-|-
+
+## §B  bugs
+id|status|task|cites
+-|-|-|-
diff --git a/packages/core/src/drift.ts b/packages/core/src/drift.ts
new file mode 100644
index 0000000..5f06d2a
--- /dev/null
+++ b/packages/core/src/drift.ts
@@ -0,0 +1,139 @@
+// Token-per-call drift detector. Pure function: takes the raw rows the
+// storage method emits and classifies each operation into one of six
+// buckets. No DB access here — keeps the classifier easy to unit-test
+// in isolation and lets the CLI and MCP tool share one implementation.
+
+export type DriftClassification =
+  | 'up_drift'
+  | 'down_drift'
+  | 'new_tool'
+  | 'gone'
+  | 'insufficient_data'
+  | 'stable';
+
+export interface DriftRawRow {
+  operation: string;
+  baseline_median: number | null;
+  baseline_n: number;
+  recent_median: number | null;
+  recent_n: number;
+}
+
+export interface DriftRow {
+  operation: string;
+  baseline_median: number | null;
+  baseline_n: number;
+  recent_median: number | null;
+  recent_n: number;
+  ratio: number | null;
+  classification: DriftClassification;
+}
+
+export interface DriftClassifyOptions {
+  /** Inclusive ratio cut-off for `up_drift` (e.g. 1.25 = +25%). */
+  threshold: number;
+  /** Inclusive ratio cut-off for `down_drift` (e.g. 0.75 = -25%). */
+  down_threshold: number;
+  /** Minimum sample size in each window to trust the median signal. */
+  min_calls: number;
+}
+
+export interface DriftReport {
+  threshold: {
+    up: number;
+    down: number;
+    min_calls: number;
+  };
+  rows: DriftRow[];
+  new_tools: string[];
+  gone_tools: string[];
+  insufficient_data: Array<{ operation: string; baseline_n: number; recent_n: number }>;
+}
+
+/**
+ * Classify each tool's tokens-per-call drift between a baseline and a
+ * recent window. The caller is responsible for picking non-overlapping
+ * windows; this function only consumes the rows.
+ *
+ * Buckets (evaluated in order):
+ *   - `new_tool`: no baseline data, recent has >= min_calls samples
+ *   - `gone`: no recent data, baseline has >= min_calls samples
+ *   - `insufficient_data`: either window below min_calls (not new/gone)
+ *   - `up_drift`: ratio >= threshold AND both windows >= min_calls
+ *   - `down_drift`: ratio <= down_threshold AND both windows >= min_calls
+ *   - `stable`: otherwise
+ *
+ * The returned `rows` array preserves every row from the input so callers
+ * can render the full table; the convenience arrays (`new_tools`,
+ * `gone_tools`, `insufficient_data`) point at the same operations for
+ * one-line summary lines.
+ */
+export function classifyDrift(
+  rawRows: ReadonlyArray<DriftRawRow>,
+  opts: DriftClassifyOptions,
+): DriftReport {
+  const rows: DriftRow[] = rawRows.map((raw) => {
+    const ratio = computeRatio(raw.baseline_median, raw.recent_median);
+    const classification = classifyOne(raw, ratio, opts);
+    return {
+      operation: raw.operation,
+      baseline_median: raw.baseline_median,
+      baseline_n: raw.baseline_n,
+      recent_median: raw.recent_median,
+      recent_n: raw.recent_n,
+      ratio,
+      classification,
+    };
+  });
+
+  const new_tools = rows
+    .filter((row) => row.classification === 'new_tool')
+    .map((row) => row.operation);
+  const gone_tools = rows
+    .filter((row) => row.classification === 'gone')
+    .map((row) => row.operation);
+  const insufficient_data = rows
+    .filter((row) => row.classification === 'insufficient_data')
+    .map((row) => ({
+      operation: row.operation,
+      baseline_n: row.baseline_n,
+      recent_n: row.recent_n,
+    }));
+
+  return {
+    threshold: {
+      up: opts.threshold,
+      down: opts.down_threshold,
+      min_calls: opts.min_calls,
+    },
+    rows,
+    new_tools,
+    gone_tools,
+    insufficient_data,
+  };
+}
+
+function classifyOne(
+  raw: DriftRawRow,
+  ratio: number | null,
+  opts: DriftClassifyOptions,
+): DriftClassification {
+  const hasBaseline = raw.baseline_n >= opts.min_calls;
+  const hasRecent = raw.recent_n >= opts.min_calls;
+  if (raw.baseline_n === 0 && hasRecent) return 'new_tool';
+  if (raw.recent_n === 0 && hasBaseline) return 'gone';
+  if (!hasBaseline || !hasRecent) return 'insufficient_data';
+  if (ratio === null) return 'stable';
+  if (ratio >= opts.threshold) return 'up_drift';
+  if (ratio <= opts.down_threshold) return 'down_drift';
+  return 'stable';
+}
+
+function computeRatio(
+  baseline: number | null,
+  recent: number | null,
+): number | null {
+  if (baseline === null || recent === null) return null;
+  if (baseline <= 0) return null;
+  return recent / baseline;
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index ebeeecc..402f430 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -17,6 +17,14 @@ export {
   type SavingsReferenceRow,
   type SavingsReferenceTotals,
 } from './savings-reference.js';
+export {
+  classifyDrift,
+  type DriftClassification,
+  type DriftClassifyOptions,
+  type DriftRawRow,
+  type DriftReport,
+  type DriftRow,
+} from './drift.js';
 export {
   autoReleaseStalePlanSubtaskClaims,
   bulkRescueStrandedSessions,
diff --git a/packages/storage/src/storage.ts b/packages/storage/src/storage.ts
index 2c89e3f..f2e39a2 100644
--- a/packages/storage/src/storage.ts
+++ b/packages/storage/src/storage.ts
@@ -1419,6 +1419,98 @@ export class Storage {
       }));
   }
 
+  // Earliest ts recorded in mcp_metrics. Used by the drift detector to warn
+  // when the requested baseline window starts before the first recorded
+  // receipt — drift signals are noisy when the baseline lacks history.
+  mcpMetricsMinTs(): number | null {
+    const row = this.db
+      .prepare('SELECT MIN(ts) AS min_ts FROM mcp_metrics')
+      .get() as { min_ts: number | null } | undefined;
+    return row?.min_ts ?? null;
+  }
+
+  // Per-operation median tokens-per-call across two non-overlapping windows.
+  // Reads from the existing mcp_metrics table — no schema change. SQLite has
+  // no PERCENTILE_CONT, so the median is computed via correlated subquery
+  // with LIMIT 1 OFFSET (count-1)/2 on ordered rows. Only ok=1 receipts are
+  // considered so retry storms and rejections do not skew the signal. The
+  // FULL OUTER JOIN produces a row for every operation present in either
+  // window, with NULL median+n=0 for the missing side (used to classify
+  // new_tool / gone in the caller). SQLite 3.39+ supports FULL OUTER JOIN
+  // (better-sqlite3 ^11 ships 3.49).
+  mcpTokenDriftPerOperation(opts: {
+    baseline_since: number;
+    baseline_until: number;
+    recent_since: number;
+    recent_until: number;
+  }): Array<{
+    operation: string;
+    baseline_median: number | null;
+    baseline_n: number;
+    recent_median: number | null;
+    recent_n: number;
+  }> {
+    const rows = this.db
+      .prepare(
+        `WITH
+        baseline AS (
+          SELECT operation,
+                 (input_tokens + output_tokens) AS tpc,
+                 ROW_NUMBER() OVER (PARTITION BY operation ORDER BY input_tokens + output_tokens) AS rn,
+                 COUNT(*)    OVER (PARTITION BY operation) AS n
+            FROM mcp_metrics
+           WHERE ok = 1 AND ts >= ? AND ts < ?
+        ),
+        recent AS (
+          SELECT operation,
+                 (input_tokens + output_tokens) AS tpc,
+                 ROW_NUMBER() OVER (PARTITION BY operation ORDER BY input_tokens + output_tokens) AS rn,
+                 COUNT(*)    OVER (PARTITION BY operation) AS n
+            FROM mcp_metrics
+           WHERE ok = 1 AND ts >= ? AND ts <= ?
+        ),
+        baseline_med AS (
+          SELECT operation, tpc AS median, n
+            FROM baseline
+           WHERE rn = (n - 1) / 2 + 1
+        ),
+        recent_med AS (
+          SELECT operation, tpc AS median, n
+            FROM recent
+           WHERE rn = (n - 1) / 2 + 1
+        )
+        SELECT COALESCE(b.operation, r.operation) AS operation,
+               b.median AS baseline_median,
+               b.n      AS baseline_n,
+               r.median AS recent_median,
+               r.n      AS recent_n
+          FROM baseline_med b
+          FULL OUTER JOIN recent_med r ON b.operation = r.operation
+         ORDER BY operation`,
+      )
+      .all(
+        opts.baseline_since,
+        opts.baseline_until,
+        opts.recent_since,
+        opts.recent_until,
+      ) as Array<{
+      operation: string | null;
+      baseline_median: number | null;
+      baseline_n: number | null;
+      recent_median: number | null;
+      recent_n: number | null;
+    }>;
+    return rows
+      .filter((row): row is { operation: string } & typeof row => row.operation !== null)
+      .map((row) => ({
+        operation: row.operation,
+        baseline_median: row.baseline_median ?? null,
+        baseline_n: row.baseline_n ?? 0,
+        recent_median: row.recent_median ?? null,
+        recent_n: row.recent_n ?? 0,
+      }));
+  }
+
   private mcpMetricSessionCount(where: string, args: ReadonlyArray<number | string>): number {
     const row = this.db
       .prepare(
diff --git a/packages/storage/test/mcp-metrics.test.ts b/packages/storage/test/mcp-metrics.test.ts
index 6f3f038..a18563c 100644
--- a/packages/storage/test/mcp-metrics.test.ts
+++ b/packages/storage/test/mcp-metrics.test.ts
@@ -292,4 +292,134 @@ describe('mcp_metrics storage', () => {
   it('aggregateMcpMetricsDaily returns empty array when no rows in window', () => {
     expect(storage.aggregateMcpMetricsDaily({ since: 0 })).toEqual([]);
   });
+
+  describe('mcpTokenDriftPerOperation', () => {
+    // Helper: build a row at ts with explicit tokens-per-call sum. Each row
+    // is one mcp_metrics receipt; tpc is the (input + output) total.
+    function recordTpc(
+      storage: Storage,
+      ts: number,
+      operation: string,
+      tpc: number,
+      ok = true,
+    ): void {
+      // Split the tpc as 1/3 input, 2/3 output to keep the receipts realistic.
+      const input = Math.round(tpc / 3);
+      const output = tpc - input;
+      record(storage, { ts, operation, input_tokens: input, output_tokens: output, ok });
+    }
+
+    it('returns no rows when both windows are empty', () => {
+      const rows = storage.mcpTokenDriftPerOperation({
+        baseline_since: 0,
+        baseline_until: 1_000,
+        recent_since: 2_000,
+        recent_until: 3_000,
+      });
+      expect(rows).toEqual([]);
+    });
+
+    it('reports stable median when baseline and recent overlap closely', () => {
+      // Baseline: 5 calls @ 100 tpc, baseline window [0, 1000).
+      for (let i = 0; i < 5; i += 1) recordTpc(storage, 100 + i, 'search', 100);
+      // Recent: 5 calls @ 105 tpc, recent window [2000, 3000].
+      for (let i = 0; i < 5; i += 1) recordTpc(storage, 2_000 + i, 'search', 105);
+
+      const rows = storage.mcpTokenDriftPerOperation({
+        baseline_since: 0,
+        baseline_until: 1_000,
+        recent_since: 2_000,
+        recent_until: 3_000,
+      });
+      expect(rows).toHaveLength(1);
+      expect(rows[0]).toMatchObject({
+        operation: 'search',
+        baseline_median: 100,
+        baseline_n: 5,
+        recent_median: 105,
+        recent_n: 5,
+      });
+    });
+
+    it('captures clear up-drift when recent median is much higher than baseline', () => {
+      // Baseline: 21 calls @ ~100 tpc; recent: 21 calls @ ~200 tpc.
+      for (let i = 0; i < 21; i += 1) recordTpc(storage, 100 + i, 'search', 100);
+      for (let i = 0; i < 21; i += 1) recordTpc(storage, 2_000 + i, 'search', 200);
+
+      const rows = storage.mcpTokenDriftPerOperation({
+        baseline_since: 0,
+        baseline_until: 1_000,
+        recent_since: 2_000,
+        recent_until: 3_000,
+      });
+      expect(rows).toHaveLength(1);
+      expect(rows[0]?.baseline_median).toBe(100);
+      expect(rows[0]?.recent_median).toBe(200);
+      expect(rows[0]?.baseline_n).toBe(21);
+      expect(rows[0]?.recent_n).toBe(21);
+    });
+
+    it('captures down-drift when recent median is much lower than baseline', () => {
+      for (let i = 0; i < 25; i += 1) recordTpc(storage, 100 + i, 'task_post', 600);
+      for (let i = 0; i < 25; i += 1) recordTpc(storage, 2_000 + i, 'task_post', 300);
+
+      const rows = storage.mcpTokenDriftPerOperation({
+        baseline_since: 0,
+        baseline_until: 1_000,
+        recent_since: 2_000,
+        recent_until: 3_000,
+      });
+      const tp = rows.find((r) => r.operation === 'task_post');
+      expect(tp?.baseline_median).toBe(600);
+      expect(tp?.recent_median).toBe(300);
+    });
+
+    it('reports new tools with baseline_n=0 and recent_n>0', () => {
+      // Only recent window has the operation.
+      for (let i = 0; i < 20; i += 1) recordTpc(storage, 2_000 + i, 'savings_drift_report', 150);
+
+      const rows = storage.mcpTokenDriftPerOperation({
+        baseline_since: 0,
+        baseline_until: 1_000,
+        recent_since: 2_000,
+        recent_until: 3_000,
+      });
+      const newOp = rows.find((r) => r.operation === 'savings_drift_report');
+      expect(newOp?.baseline_n).toBe(0);
+      expect(newOp?.baseline_median).toBeNull();
+      expect(newOp?.recent_n).toBe(20);
+      expect(newOp?.recent_median).toBe(150);
+    });
+
+    it('honors ok=0 filter so retry storms do not skew the median', () => {
+      // 10 ok=1 receipts and 10 ok=0 receipts in recent; only ok=1 should
+      // contribute to recent_median + recent_n.
+      for (let i = 0; i < 10; i += 1) recordTpc(storage, 100 + i, 'search', 100);
+      for (let i = 0; i < 10; i += 1) recordTpc(storage, 2_000 + i, 'search', 200, true);
+      for (let i = 0; i < 10; i += 1) recordTpc(storage, 2_500 + i, 'search', 5_000, false);
+
+      const rows = storage.mcpTokenDriftPerOperation({
+        baseline_since: 0,
+        baseline_until: 1_000,
+        recent_since: 2_000,
+        recent_until: 3_000,
+      });
+      const search = rows.find((r) => r.operation === 'search');
+      expect(search?.recent_n).toBe(10);
+      expect(search?.recent_median).toBe(200);
+    });
+  });
+
+  describe('mcpMetricsMinTs', () => {
+    it('returns null on empty table', () => {
+      expect(storage.mcpMetricsMinTs()).toBeNull();
+    });
+
+    it('returns the earliest ts when receipts exist', () => {
+      record(storage, { ts: 500, operation: 'search' });
+      record(storage, { ts: 200, operation: 'search' });
+      record(storage, { ts: 900, operation: 'task_post' });
+      expect(storage.mcpMetricsMinTs()).toBe(200);
+    });
+  });
 });