diff --git a/.changeset/gather-match-rationale.md b/.changeset/gather-match-rationale.md new file mode 100644 index 00000000..ea69675a --- /dev/null +++ b/.changeset/gather-match-rationale.md @@ -0,0 +1,5 @@ +--- +"@anarchitecture/ghost": minor +--- + +`ghost gather` now explains why each candidate matched, so the request-to-node route is auditable instead of a bare ranking. diff --git a/packages/ghost/src/commands/gather-command.ts b/packages/ghost/src/commands/gather-command.ts index 4d846a9b..948150c7 100644 --- a/packages/ghost/src/commands/gather-command.ts +++ b/packages/ghost/src/commands/gather-command.ts @@ -136,12 +136,35 @@ function formatCandidatesMarkdown(query: string, matches: SearchHit[]): string { ); for (const hit of matches) { const kind = hit.surface ? "surface" : "node"; - lines.push(`- \`${hit.id}\` (${kind})`); + lines.push(`- \`${hit.id}\` (${kind}) — ${matchLabel(hit)}`); if (hit.description) lines.push(` - ${hit.description}`); } return `${lines.join("\n")}\n`; } +/** + * Explain *why* a candidate matched, so the route is auditable rather than a + * bare ranking. Mirrors `gather`'s slice provenance: the agent (and the human + * reading a review packet) can see which signal placed each node. The `reason` + * tier names the field for the substring tiers; `coverage` tells the multi-word + * story the tier alone can't. + */ +function matchLabel(hit: SearchHit): string { + switch (hit.reason) { + case "exact": + return "exact id"; + case "fuzzy": + return "likely typo of the name"; + default: { + if (hit.coverage) { + const { covered, total } = hit.coverage; + return `matched ${covered}/${total} words in ${hit.reason}`; + } + return `matched the query in ${hit.reason}`; + } + } +} + function provenanceLabel(provenance: GraphSliceProvenance): string { switch (provenance.kind) { case "own": diff --git a/packages/ghost/src/ghost-core/graph/search.ts b/packages/ghost/src/ghost-core/graph/search.ts index 7d1ce2f5..52f8b515 100644 --- a/packages/ghost/src/ghost-core/graph/search.ts +++ b/packages/ghost/src/ghost-core/graph/search.ts @@ -10,7 +10,11 @@ import { GHOST_GRAPH_ROOT_ID, type GhostGraph } from "./types.js"; * its words a node covers. Selection machinery, not interpretation. */ -/** Why a hit matched, strongest first. Doubles as the ranking tier. */ +/** + * Why a hit matched, strongest first. Doubles as the ranking tier, and — for + * the `name`/`description`/`body` tiers — names the field the signal landed in, + * so a route can explain itself without a parallel field. + */ export type SearchReason = "exact" | "name" | "description" | "body" | "fuzzy"; export interface SearchHit { @@ -21,6 +25,13 @@ export interface SearchHit { /** Higher is more relevant; ties break on id ascending. */ score: number; reason: SearchReason; + /** + * For a multi-word query, how many of its tokens the node covered. Present + * only on a coverage match — a whole-query (verbatim or fuzzy) hit has no + * coverage story. This is the one fact the `reason` tier alone can't tell, + * and it's what makes a multi-word route auditable rather than a bare score. + */ + coverage?: { covered: number; total: number }; } const SCORE: Record = { @@ -74,6 +85,7 @@ export function searchGraph( surface, score: scored.score, reason: scored.reason, + ...(scored.coverage ? { coverage: scored.coverage } : {}), }); } @@ -115,6 +127,7 @@ const STOPWORDS = new Set([ interface ScoredMatch { score: number; reason: SearchReason; + coverage?: { covered: number; total: number }; } /** @@ -163,7 +176,7 @@ function scoreCandidate( if (tokens.length < 2) return undefined; let covered = 0; - let strongest: SearchReason | undefined; + let strongest: "name" | "description" | "body" | undefined; for (const token of tokens) { const field = matchField(token, lowerName, lowerDesc, lowerBody); if (!field) continue; @@ -175,7 +188,11 @@ function scoreCandidate( // Scale the field tier by the fraction of tokens covered so a full-phrase // match outranks a partial one, but keep it below the verbatim tiers. const coverage = covered / tokens.length; - return { score: Math.round(SCORE[strongest] * coverage), reason: strongest }; + return { + score: Math.round(SCORE[strongest] * coverage), + reason: strongest, + coverage: { covered, total: tokens.length }, + }; } /** The strongest field a single token appears in, or undefined. */ @@ -184,7 +201,7 @@ function matchField( lowerName: string, lowerDesc: string | undefined, lowerBody: string, -): SearchReason | undefined { +): "name" | "description" | "body" | undefined { if (lowerName.includes(token)) return "name"; if (lowerDesc?.includes(token)) return "description"; if (lowerBody.includes(token)) return "body"; diff --git a/packages/ghost/test/ghost-core/graph-search.test.ts b/packages/ghost/test/ghost-core/graph-search.test.ts index 057c0758..8a0541d5 100644 --- a/packages/ghost/test/ghost-core/graph-search.test.ts +++ b/packages/ghost/test/ghost-core/graph-search.test.ts @@ -111,6 +111,31 @@ describe("searchGraph", () => { expect(withStop[0]?.score).toBe(withoutStop[0]?.score); }); + it("carries no coverage on a whole-query hit", () => { + const graph = fixture(); + const hit = searchGraph("marketing", graph)[0]; + expect(hit?.reason).toBe("exact"); + expect(hit?.coverage).toBeUndefined(); + }); + + it("attaches token coverage for a multi-word phrase match", () => { + const graph = fixture(); + // 'outbound surfaces' — both words are in "Outbound brand surfaces." but + // not contiguous, so this is coverage, not a verbatim substring. + const hit = searchGraph("outbound surfaces", graph)[0]; + expect(hit?.reason).toBe("description"); + expect(hit?.coverage).toEqual({ covered: 2, total: 2 }); + }); + + it("reports partial coverage when only some words land", () => { + const graph = fixture(); + // 'payment elsewhere' — only 'payment' is in checkout's description. + const hit = searchGraph("payment elsewhere", graph).find( + (h) => h.id === "checkout", + ); + expect(hit?.coverage).toEqual({ covered: 1, total: 2 }); + }); + it("excludes the implicit core root and returns nothing for an empty query", () => { const graph = fixture(); expect(searchGraph("core", graph).every((h) => h.id !== "core")).toBe(true);