block · nahiyankhan · Jun 29, 2026
@@ -0,0 +1,5 @@
+---
+"@anarchitecture/ghost": minor
+---
+
+`ghost gather` now explains why each candidate matched, so the request-to-node route is auditable instead of a bare ranking.
@@ -136,12 +136,35 @@ function formatCandidatesMarkdown(query: string, matches: SearchHit[]): string {
   );
   for (const hit of matches) {
     const kind = hit.surface ? "surface" : "node";
-    lines.push(`- \`${hit.id}\` (${kind})`);
+    lines.push(`- \`${hit.id}\` (${kind}) — ${matchLabel(hit)}`);
     if (hit.description) lines.push(`  - ${hit.description}`);
   }
   return `${lines.join("\n")}\n`;
 }
 
+/**
+ * Explain *why* a candidate matched, so the route is auditable rather than a
+ * bare ranking. Mirrors `gather`'s slice provenance: the agent (and the human
+ * reading a review packet) can see which signal placed each node. The `reason`
+ * tier names the field for the substring tiers; `coverage` tells the multi-word
+ * story the tier alone can't.
+ */
+function matchLabel(hit: SearchHit): string {
+  switch (hit.reason) {
+    case "exact":
+      return "exact id";
+    case "fuzzy":
+      return "likely typo of the name";
+    default: {
+      if (hit.coverage) {
+        const { covered, total } = hit.coverage;
+        return `matched ${covered}/${total} words in ${hit.reason}`;
+      }
+      return `matched the query in ${hit.reason}`;
+    }
+  }
+}
+
 function provenanceLabel(provenance: GraphSliceProvenance): string {
   switch (provenance.kind) {
     case "own":

@@ -10,7 +10,11 @@ import { GHOST_GRAPH_ROOT_ID, type GhostGraph } from "./types.js";
  * its words a node covers. Selection machinery, not interpretation.
  */
 
-/** Why a hit matched, strongest first. Doubles as the ranking tier. */
+/**
+ * Why a hit matched, strongest first. Doubles as the ranking tier, and — for
+ * the `name`/`description`/`body` tiers — names the field the signal landed in,
+ * so a route can explain itself without a parallel field.
+ */
 export type SearchReason = "exact" | "name" | "description" | "body" | "fuzzy";
 
 export interface SearchHit {
@@ -21,6 +25,13 @@ export interface SearchHit {
   /** Higher is more relevant; ties break on id ascending. */
   score: number;
   reason: SearchReason;
+  /**
+   * For a multi-word query, how many of its tokens the node covered. Present
+   * only on a coverage match — a whole-query (verbatim or fuzzy) hit has no
+   * coverage story. This is the one fact the `reason` tier alone can't tell,
+   * and it's what makes a multi-word route auditable rather than a bare score.
+   */
+  coverage?: { covered: number; total: number };
 }
 
 const SCORE: Record<SearchReason, number> = {
@@ -74,6 +85,7 @@ export function searchGraph(
       surface,
       score: scored.score,
       reason: scored.reason,
+      ...(scored.coverage ? { coverage: scored.coverage } : {}),
     });
   }
 
@@ -115,6 +127,7 @@ const STOPWORDS = new Set([
 interface ScoredMatch {
   score: number;
   reason: SearchReason;
+  coverage?: { covered: number; total: number };
 }
 
 /**
@@ -163,7 +176,7 @@ function scoreCandidate(
   if (tokens.length < 2) return undefined;
 
   let covered = 0;
-  let strongest: SearchReason | undefined;
+  let strongest: "name" | "description" | "body" | undefined;
   for (const token of tokens) {
     const field = matchField(token, lowerName, lowerDesc, lowerBody);
     if (!field) continue;
@@ -175,7 +188,11 @@ function scoreCandidate(
   // Scale the field tier by the fraction of tokens covered so a full-phrase
   // match outranks a partial one, but keep it below the verbatim tiers.
   const coverage = covered / tokens.length;
-  return { score: Math.round(SCORE[strongest] * coverage), reason: strongest };
+  return {
+    score: Math.round(SCORE[strongest] * coverage),
+    reason: strongest,
+    coverage: { covered, total: tokens.length },
+  };
 }
 
 /** The strongest field a single token appears in, or undefined. */
@@ -184,7 +201,7 @@ function matchField(
   lowerName: string,
   lowerDesc: string | undefined,
   lowerBody: string,
-): SearchReason | undefined {
+): "name" | "description" | "body" | undefined {
   if (lowerName.includes(token)) return "name";
   if (lowerDesc?.includes(token)) return "description";
   if (lowerBody.includes(token)) return "body";

@@ -111,6 +111,31 @@ describe("searchGraph", () => {
     expect(withStop[0]?.score).toBe(withoutStop[0]?.score);
   });
 
+  it("carries no coverage on a whole-query hit", () => {
+    const graph = fixture();
+    const hit = searchGraph("marketing", graph)[0];
+    expect(hit?.reason).toBe("exact");
+    expect(hit?.coverage).toBeUndefined();
+  });
+
+  it("attaches token coverage for a multi-word phrase match", () => {
+    const graph = fixture();
+    // 'outbound surfaces' — both words are in "Outbound brand surfaces." but
+    // not contiguous, so this is coverage, not a verbatim substring.
+    const hit = searchGraph("outbound surfaces", graph)[0];
+    expect(hit?.reason).toBe("description");
+    expect(hit?.coverage).toEqual({ covered: 2, total: 2 });
+  });
+
+  it("reports partial coverage when only some words land", () => {
+    const graph = fixture();
+    // 'payment elsewhere' — only 'payment' is in checkout's description.
+    const hit = searchGraph("payment elsewhere", graph).find(
+      (h) => h.id === "checkout",
+    );
+    expect(hit?.coverage).toEqual({ covered: 1, total: 2 });
+  });
+
   it("excludes the implicit core root and returns nothing for an empty query", () => {
     const graph = fixture();
     expect(searchGraph("core", graph).every((h) => h.id !== "core")).toBe(true);