Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/gather-match-rationale.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@anarchitecture/ghost": minor
---

`ghost gather` now explains why each candidate matched, so the request-to-node route is auditable instead of a bare ranking.
25 changes: 24 additions & 1 deletion packages/ghost/src/commands/gather-command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,35 @@ function formatCandidatesMarkdown(query: string, matches: SearchHit[]): string {
);
for (const hit of matches) {
const kind = hit.surface ? "surface" : "node";
lines.push(`- \`${hit.id}\` (${kind})`);
lines.push(`- \`${hit.id}\` (${kind}) — ${matchLabel(hit)}`);
if (hit.description) lines.push(` - ${hit.description}`);
}
return `${lines.join("\n")}\n`;
}

/**
* Explain *why* a candidate matched, so the route is auditable rather than a
* bare ranking. Mirrors `gather`'s slice provenance: the agent (and the human
* reading a review packet) can see which signal placed each node. The `reason`
* tier names the field for the substring tiers; `coverage` tells the multi-word
* story the tier alone can't.
*/
function matchLabel(hit: SearchHit): string {
switch (hit.reason) {
case "exact":
return "exact id";
case "fuzzy":
return "likely typo of the name";
default: {
if (hit.coverage) {
const { covered, total } = hit.coverage;
return `matched ${covered}/${total} words in ${hit.reason}`;
}
return `matched the query in ${hit.reason}`;
}
}
}

function provenanceLabel(provenance: GraphSliceProvenance): string {
switch (provenance.kind) {
case "own":
Expand Down
25 changes: 21 additions & 4 deletions packages/ghost/src/ghost-core/graph/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ import { GHOST_GRAPH_ROOT_ID, type GhostGraph } from "./types.js";
* its words a node covers. Selection machinery, not interpretation.
*/

/** Why a hit matched, strongest first. Doubles as the ranking tier. */
/**
* Why a hit matched, strongest first. Doubles as the ranking tier, and — for
* the `name`/`description`/`body` tiers — names the field the signal landed in,
* so a route can explain itself without a parallel field.
*/
export type SearchReason = "exact" | "name" | "description" | "body" | "fuzzy";

export interface SearchHit {
Expand All @@ -21,6 +25,13 @@ export interface SearchHit {
/** Higher is more relevant; ties break on id ascending. */
score: number;
reason: SearchReason;
/**
* For a multi-word query, how many of its tokens the node covered. Present
* only on a coverage match — a whole-query (verbatim or fuzzy) hit has no
* coverage story. This is the one fact the `reason` tier alone can't tell,
* and it's what makes a multi-word route auditable rather than a bare score.
*/
coverage?: { covered: number; total: number };
}

const SCORE: Record<SearchReason, number> = {
Expand Down Expand Up @@ -74,6 +85,7 @@ export function searchGraph(
surface,
score: scored.score,
reason: scored.reason,
...(scored.coverage ? { coverage: scored.coverage } : {}),
});
}

Expand Down Expand Up @@ -115,6 +127,7 @@ const STOPWORDS = new Set([
interface ScoredMatch {
score: number;
reason: SearchReason;
coverage?: { covered: number; total: number };
}

/**
Expand Down Expand Up @@ -163,7 +176,7 @@ function scoreCandidate(
if (tokens.length < 2) return undefined;

let covered = 0;
let strongest: SearchReason | undefined;
let strongest: "name" | "description" | "body" | undefined;
for (const token of tokens) {
const field = matchField(token, lowerName, lowerDesc, lowerBody);
if (!field) continue;
Expand All @@ -175,7 +188,11 @@ function scoreCandidate(
// Scale the field tier by the fraction of tokens covered so a full-phrase
// match outranks a partial one, but keep it below the verbatim tiers.
const coverage = covered / tokens.length;
return { score: Math.round(SCORE[strongest] * coverage), reason: strongest };
return {
score: Math.round(SCORE[strongest] * coverage),
reason: strongest,
coverage: { covered, total: tokens.length },
};
}

/** The strongest field a single token appears in, or undefined. */
Expand All @@ -184,7 +201,7 @@ function matchField(
lowerName: string,
lowerDesc: string | undefined,
lowerBody: string,
): SearchReason | undefined {
): "name" | "description" | "body" | undefined {
if (lowerName.includes(token)) return "name";
if (lowerDesc?.includes(token)) return "description";
if (lowerBody.includes(token)) return "body";
Expand Down
25 changes: 25 additions & 0 deletions packages/ghost/test/ghost-core/graph-search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,31 @@ describe("searchGraph", () => {
expect(withStop[0]?.score).toBe(withoutStop[0]?.score);
});

it("carries no coverage on a whole-query hit", () => {
const graph = fixture();
const hit = searchGraph("marketing", graph)[0];
expect(hit?.reason).toBe("exact");
expect(hit?.coverage).toBeUndefined();
});

it("attaches token coverage for a multi-word phrase match", () => {
const graph = fixture();
// 'outbound surfaces' — both words are in "Outbound brand surfaces." but
// not contiguous, so this is coverage, not a verbatim substring.
const hit = searchGraph("outbound surfaces", graph)[0];
expect(hit?.reason).toBe("description");
expect(hit?.coverage).toEqual({ covered: 2, total: 2 });
});

it("reports partial coverage when only some words land", () => {
const graph = fixture();
// 'payment elsewhere' — only 'payment' is in checkout's description.
const hit = searchGraph("payment elsewhere", graph).find(
(h) => h.id === "checkout",
);
expect(hit?.coverage).toEqual({ covered: 1, total: 2 });
});

it("excludes the implicit core root and returns nothing for an empty query", () => {
const graph = fixture();
expect(searchGraph("core", graph).every((h) => h.id !== "core")).toBe(true);
Expand Down