Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .changeset/gain-drift-detector.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
'colonyq': minor
'@colony/storage': minor
'@colony/core': minor
'@colony/mcp-server': minor
---

`colony gain drift` and a matching `savings_drift_report` MCP tool flag
tools whose median tokens-per-call has drifted up or down. Default windows
are non-overlapping: recent = last 3 days, baseline = 14 days ending 3 days
before recent. Default thresholds: `--threshold 1.25` (up), `--down-threshold
0.75`, `--min-calls 20` per window. Classifications: `up_drift`,
`down_drift`, `new_tool` (no baseline), `gone` (no recent), `insufficient_data`,
`stable`.

Storage gains `Storage.mcpTokenDriftPerOperation()` which computes per-operation
medians with a `ROW_NUMBER() OVER (PARTITION BY operation ORDER BY tpc)`
window function — chosen over the correlated `LIMIT 1 OFFSET (COUNT-1)/2`
form because SQLite forbids outer aggregate references in scalar-subquery
`OFFSET`. A `mcpMetricsMinTs()` helper surfaces a one-line warning when the
baseline window starts before the first recorded metric.
234 changes: 233 additions & 1 deletion apps/cli/src/commands/gain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import {
type SavingsLiveComparisonCost,
type SavingsReferenceRow,
type SavingsReferenceTotals,
classifyDrift,
type DriftReport,
savingsLiveComparison,
savingsLiveComparisonCost,
savingsReferenceTotals,
Expand Down Expand Up @@ -40,6 +42,16 @@ interface GainOptions {
topOps?: string;
}

interface GainDriftOptions {
json?: boolean;
baselineDays?: string;
recentDays?: string;
minCalls?: string;
threshold?: string;
downThreshold?: string;
operation?: string;
}

export interface MoverRow {
operation: string;
recent_calls: number;
Expand Down Expand Up @@ -78,7 +90,7 @@ interface TopErrorReason {
}

export function registerGainCommand(program: Command): void {
program
const gain = program
.command('gain')
.description('Show colony token/cost savings from live mcp_metrics receipts')
.option('--json', 'emit structured JSON')
Expand Down Expand Up @@ -250,6 +262,91 @@ export function registerGainCommand(program: Command): void {
movers,
);
});

gain
.command('drift')
.description(
'Flag tools whose median tokens-per-call has drifted vs a baseline window (no schema change)',
)
.option('--baseline-days <n>', 'baseline window length in days (default 14)')
.option('--recent-days <n>', 'recent window length in days (default 3)')
.option('--min-calls <n>', 'minimum sample size per window to trust signal (default 20)')
.option('--threshold <ratio>', 'up-drift trigger ratio (default 1.25 = +25%)')
.option('--down-threshold <ratio>', 'down-drift trigger ratio (default 0.75 = -25%)')
.option('--operation <name>', 'show only this operation row in the table')
.option('--json', 'emit structured JSON')
.action(async (opts: GainDriftOptions) => {
const settings = loadSettings();
const baselineDays = parsePositiveFloat(opts.baselineDays) ?? 14;
const recentDays = parsePositiveFloat(opts.recentDays) ?? 3;
const minCalls = parsePositiveInt(opts.minCalls) ?? 20;
const threshold = parsePositiveFloat(opts.threshold) ?? 1.25;
const downThreshold = parsePositiveFloat(opts.downThreshold) ?? 0.75;

const now = Date.now();
const recentSince = now - recentDays * 24 * 60 * 60_000;
// 3-day gap between recent and baseline so day-of-week noise does not
// bleed across — see spec/v0.x roadmap.
const baselineUntil = recentSince - 3 * 24 * 60 * 60_000;
const baselineSince = baselineUntil - baselineDays * 24 * 60 * 60_000;
const recentUntil = now;

const { rawRows, minTs } = await withStorage(
settings,
(storage) => {
const allRows = storage.mcpTokenDriftPerOperation({
baseline_since: baselineSince,
baseline_until: baselineUntil,
recent_since: recentSince,
recent_until: recentUntil,
});
const filtered =
opts.operation !== undefined
? allRows.filter((row) => row.operation === opts.operation)
: allRows;
return { rawRows: filtered, minTs: storage.mcpMetricsMinTs() };
},
{ readonly: true },
);

const report = classifyDrift(rawRows, {
threshold,
down_threshold: downThreshold,
min_calls: minCalls,
});

const baselineWarning =
minTs !== null && minTs > baselineSince
? `baseline window starts before first recorded metric — drift detection needs ~${
Math.ceil((recentDays + baselineDays + 3) - (now - minTs) / (24 * 60 * 60_000))
} more day${baselineDays > 1 ? 's' : ''} of history`
: null;

if (opts.json === true) {
const payload = {
window: {
baseline_since: baselineSince,
baseline_until: baselineUntil,
recent_since: recentSince,
recent_until: recentUntil,
},
threshold: report.threshold,
rows: report.rows,
new_tools: report.new_tools,
gone_tools: report.gone_tools,
insufficient_data: report.insufficient_data,
...(baselineWarning !== null ? { warning: baselineWarning } : {}),
};
process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
return;
}

writeDriftReport(report, {
recentDays,
baselineDays,
baselineWarning,
});
});
}

export function writeGainReport(
Expand Down Expand Up @@ -1175,6 +1272,12 @@ function parsePositiveInt(raw: string | undefined): number | undefined {
return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : undefined;
}

function parsePositiveFloat(raw: string | undefined): number | undefined {
if (raw === undefined || raw.trim() === '') return undefined;
const parsed = Number(raw);
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
}

// rtk-style proportional bar. The row whose value equals `max` gets a full
// bar; smaller rows scale linearly. Empty when max <= 0.
export function renderImpactBar(value: number, max: number, width: number): string {
Expand Down Expand Up @@ -1586,3 +1689,132 @@ function colorByEfficiency(pct: number, text: string): string {
if (pct >= 40) return kleur.yellow().bold(text);
return kleur.red().bold(text);
}

export interface DriftReportInput {
recentDays: number;
baselineDays: number;
baselineWarning: string | null;
}

// Plaintext rendering for `colony gain drift`. Mirrors the gain layout
// (kleur-colored, padded columns). Drift-classified rows print first;
// new/gone/insufficient sets get one-line summaries underneath so a
// quick scan answers "is anything regressing?" without re-running.
export function writeDriftReport(report: DriftReport, input: DriftReportInput): void {
const w = process.stdout;
const { recentDays, baselineDays, baselineWarning } = input;
w.write(
`${kleur.bold(
`colony gain drift (recent ${formatDaysLabel(recentDays)} vs baseline ${formatDaysLabel(
baselineDays,
)})`,
)}\n`,
);
w.write(
kleur.dim(
`Thresholds: up >= ${report.threshold.up.toFixed(2)}x, down <= ${report.threshold.down.toFixed(
2,
)}x, min ${report.threshold.min_calls} calls per window.\n`,
),
);
if (baselineWarning !== null) {
w.write(`${kleur.yellow('[warn] ')}${baselineWarning}\n`);
}
const tableRows = report.rows.filter(
(row) =>
row.classification === 'up_drift' ||
row.classification === 'down_drift' ||
row.classification === 'stable',
);
if (tableRows.length === 0 && report.new_tools.length === 0 && report.gone_tools.length === 0) {
w.write(kleur.dim('No operations had enough samples in both windows.\n'));
if (report.insufficient_data.length > 0) {
writeDriftInsufficient(report);
}
return;
}
if (tableRows.length > 0) {
const widths = [24, 13, 11, 8, 7, 7, 18];
const head = padRow(
['Operation', 'Baseline med', 'Recent med', 'Ratio', 'n_base', 'n_rec', 'Class'],
widths,
);
w.write(`${kleur.dim(head)}\n`);
// Up-drift first (most urgent), then down, then stable. Within each
// bucket keep the storage-emitted alphabetical order so output is
// deterministic for tests.
const ordered = [
...tableRows.filter((row) => row.classification === 'up_drift'),
...tableRows.filter((row) => row.classification === 'down_drift'),
...tableRows.filter((row) => row.classification === 'stable'),
];
for (const row of ordered) {
const cells = [
truncate(row.operation, widths[0] ?? 24),
formatTokens(row.baseline_median ?? 0),
formatTokens(row.recent_median ?? 0),
formatDriftRatio(row.ratio, row.classification),
formatInt(row.baseline_n),
formatInt(row.recent_n),
formatDriftClass(row.classification),
];
w.write(`${padRow(cells, widths)}\n`);
}
}
if (report.insufficient_data.length > 0) {
writeDriftInsufficient(report);
}
if (report.new_tools.length > 0) {
w.write(
`${kleur.dim('New tools (no baseline):')} ${report.new_tools.join(', ')}\n`,
);
}
if (report.gone_tools.length > 0) {
w.write(
`${kleur.dim('Gone tools (no recent calls):')} ${report.gone_tools.join(', ')}\n`,
);
}
}

function writeDriftInsufficient(report: DriftReport): void {
const names = report.insufficient_data
.map((row) => row.operation)
.slice(0, 12)
.join(', ');
const more = report.insufficient_data.length > 12
? `, +${report.insufficient_data.length - 12} more`
: '';
process.stdout.write(
`${kleur.dim(`Insufficient data (n<${report.threshold.min_calls}):`)} ${names}${more}\n`,
);
}

function formatDriftRatio(ratio: number | null, classification: DriftReport['rows'][number]['classification']): string {
if (ratio === null) return '-';
const rounded = ratio >= 10 ? ratio.toFixed(1) : ratio.toFixed(2);
if (classification === 'up_drift') return kleur.red(`▲${rounded}x`);
if (classification === 'down_drift') return kleur.green(`▼${rounded}x`);
return `${rounded}x`;
}

function formatDriftClass(classification: DriftReport['rows'][number]['classification']): string {
switch (classification) {
case 'up_drift':
return kleur.red('up_drift');
case 'down_drift':
return kleur.green('down_drift');
case 'stable':
return kleur.dim('stable');
case 'new_tool':
return kleur.cyan('new_tool');
case 'gone':
return kleur.dim('gone');
case 'insufficient_data':
return kleur.dim('insufficient');
}
}

function formatDaysLabel(days: number): string {
if (Number.isInteger(days)) return `${days}d`;
return `${days.toFixed(1)}d`;
}
Loading
Loading