diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1f3aca6..347ee4e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,44 @@ All notable changes to this project are documented here. The format is based on
 
 ## [Unreleased]
 
+### Changed — retrieval ranking & fusion (requires a one-time reindex)
+- **RRF fusion rescaled and re-keyed.** Fused scores were ~`w/k` (≈0.017), an order
+  of magnitude below the reranker's bounded bonuses, so rerank silently became the
+  primary ranker. RRF is now scaled by `k` (a pure monotonic rescale — order is
+  unchanged) so fused scores and rerank bonuses share an O(1) scale. Fusion also
+  merges on a coarse `(path, line-bucket)` key instead of an exact `(path, start,
+  end)` one: different retrievers report different ranges for the same place, so the
+  exact key almost never coincided and cross-source agreement never fired.
+  `agreeing_sources` is now counted at file granularity.
+- **Confidence uses a scale-invariant relative gap** instead of absolute thresholds.
+- **Per-file diversification**: at most 3 hits per file stay on the page; the rest
+  are pushed to the tail (nothing is dropped). Combined with bucketing this removes
+  the "same small file returned six times at different line slivers" noise.
+- **FTS recall on natural-language queries**: stopwords (`how`, `does`, `the`, …)
+  are dropped before building the FTS `MATCH`, so a query like "how does auth work"
+  no longer AND-s in filler that code chunks never contain.
+- **Symbol names are FTS-indexed.** `chunks` gained a denormalized `symbol_names`
+  column (mirrored verbatim by the FTS sync triggers, so external-content delete/
+  update stays consistent) — a query matching a symbol's name now hits even when the
+  body text doesn't repeat it. **Bumps `SCHEMA_VERSION` 1 → 2.** Older indexes are
+  still readable; `index`/`update` detect the mismatch and rebuild from scratch.
+- **Centrality fallback for ambiguous names**: symbols whose name isn't globally
+  unique never get a resolved `in_degree`; they now receive a damped, half-capped
+  bonus from a name-reference count so common names (`run`, `handle`, …) aren't
+  flatly zeroed. Precise `in_degree`, where present, still takes precedence.
+- **Test-file demotion is word-boundary aware**: `contest/`, `latest.py`,
+  `testimonials.tsx` are no longer mistaken for test files.
+- **Language-aware import resolution**: `import './base'` from a `.ts` file resolves
+  to `base.ts` rather than a same-named `base.py` earlier in the fallback order.
+- **Freshness is content-aware**: a bare `touch` (mtime change, identical bytes) is
+  a no-op for `update`, so it no longer reports the index as stale — freshness now
+  mirrors the sha-based incremental decision.
+
+### Removed
+- Dead legacy lexical-search path in `retrieval/searchers.py` (`fts_response`,
+  `fts_search`, the second `Candidate` dataclass and `_confidence`/`_fallbacks`/
+  `_trim`) — the live path goes through `pipeline.search` → `fts_candidates`.
+
 ## [1.4.0] - 2026-06-14
 
 ### Added
diff --git a/docs/SCHEMA.md b/docs/SCHEMA.md
index f5c21bd..21f2735 100644
--- a/docs/SCHEMA.md
+++ b/docs/SCHEMA.md
@@ -39,7 +39,8 @@ CREATE TABLE chunks (
     kind          TEXT,                        -- 'symbol_body' | 'window' | 'doc'
     symbol_id     INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
     content       TEXT NOT NULL,               -- raw text (secret-redacted before snippet output)
-    token_est     INTEGER NOT NULL             -- estimated tokens, for budgeting
+    token_est     INTEGER NOT NULL,            -- estimated tokens, for budgeting
+    symbol_names  TEXT NOT NULL DEFAULT ''     -- denormalized symbol name, FTS-indexed (mirrored by triggers)
 );
 CREATE INDEX idx_chunks_file ON chunks(file_id);
 
diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py
index 0e18ccc..692158a 100644
--- a/src/codebase_index/cli.py
+++ b/src/codebase_index/cli.py
@@ -54,6 +54,13 @@ def _ensure_index(ctx: "typer.Context") -> tuple[Path, Any]:
     return db_path, cfg
 
 
+def _remove_db_files(db_path: Path) -> None:
+    """Delete the SQLite db and its WAL/SHM sidecars (used to force a clean rebuild)."""
+    for p in (db_path, *(db_path.with_name(db_path.name + s) for s in ("-wal", "-shm"))):
+        if p.exists():
+            p.unlink()
+
+
 def _open_in_browser(path: Path) -> None:
     uri = path.resolve().as_uri()
     try:
@@ -278,13 +285,15 @@ def index(
 
     from .config import load
     from .indexer.pipeline import build_index
-    from .storage.db import Database
+    from .storage.db import SCHEMA_VERSION, Database, peek_schema_version
 
     root_opt = ctx.obj.get("root") if ctx.obj else None
     cfg = load(root_opt)
     db_path = Path(cfg.root) / ".claude" / "cache" / "codebase-index" / "index.sqlite"
-    if rebuild and db_path.exists():
-        db_path.unlink()
+    # A full build discards an outdated-schema index: schema.sql is applied with
+    # IF NOT EXISTS, so an upgrade can't add columns/triggers in place — recreate.
+    if rebuild or (db_path.exists() and peek_schema_version(db_path) < SCHEMA_VERSION):
+        _remove_db_files(db_path)
 
     with Database(db_path) as db:
         stats = build_index(cfg, db, root=Path(cfg.root))
@@ -321,8 +330,8 @@ def update(
     import json as _json
 
     from .config import load
-    from .indexer.pipeline import update_index
-    from .storage.db import Database
+    from .indexer.pipeline import build_index, update_index
+    from .storage.db import SCHEMA_VERSION, Database, peek_schema_version
 
     is_json = bool(ctx.obj and ctx.obj.get("json"))
     quiet = bool(ctx.obj and ctx.obj.get("quiet"))
@@ -336,8 +345,15 @@ def update(
             typer.echo("No index found. Run `codebase-index index` first.")
         raise typer.Exit(code=0)
 
-    with Database(db_path) as db:
-        stats = update_index(cfg, db, root=Path(cfg.root), since=since, all_files=all_files)
+    if peek_schema_version(db_path) < SCHEMA_VERSION:
+        # Schema changed under the index; an incremental write would target old
+        # tables. Upgrade by rebuilding from scratch (the index is a derived cache).
+        _remove_db_files(db_path)
+        with Database(db_path) as db:
+            stats = build_index(cfg, db, root=Path(cfg.root))
+    else:
+        with Database(db_path) as db:
+            stats = update_index(cfg, db, root=Path(cfg.root), since=since, all_files=all_files)
 
     if is_json:
         typer.echo(
diff --git a/src/codebase_index/discovery/classify.py b/src/codebase_index/discovery/classify.py
index f41be35..d9983ef 100644
--- a/src/codebase_index/discovery/classify.py
+++ b/src/codebase_index/discovery/classify.py
@@ -132,3 +132,20 @@ def is_generated(path: str) -> bool:
         or name.endswith(".min.js")
         or name.endswith(".min.css")
     )
+
+
+# Directory names that mark a test tree, and filename patterns for test modules.
+# Matched on whole path segments / filename stems — NOT a bare substring — so
+# `contest/`, `latest.py`, or `testimonials.ts` are never mistaken for tests.
+_TEST_DIRS = {"test", "tests", "__tests__", "__test__", "testing", "spec", "specs", "e2e"}
+
+
+def is_test_path(path: str) -> bool:
+    pure = PurePosixPath(path.replace("\\", "/"))
+    if any(part.lower() in _TEST_DIRS for part in pure.parts[:-1]):
+        return True
+    name = pure.name.lower()
+    stem = name.split(".", 1)[0]
+    if stem == "test" or stem.startswith("test_") or stem.endswith("_test"):
+        return True
+    return ".test." in name or ".spec." in name
diff --git a/src/codebase_index/graph/builder.py b/src/codebase_index/graph/builder.py
index 99f6ede..f2e342b 100644
--- a/src/codebase_index/graph/builder.py
+++ b/src/codebase_index/graph/builder.py
@@ -42,7 +42,7 @@ def resolve_edges(conn: sqlite3.Connection) -> int:
     for edge in edges:
         name = edge["dst_name"]
         if edge["edge_type"] == "import":
-            file_id = _module_to_file_id(suffix_map, name)
+            file_id = _module_to_file_id(suffix_map, name, lang=edge["lang"])
             if file_id is not None:
                 resolutions.append(("file", file_id, edge["id"]))
         elif edge["edge_type"] in _SYMBOL_EDGE_TYPES:
@@ -70,13 +70,36 @@ def _path_suffix_map(rows: list[sqlite3.Row]) -> dict[str, Optional[int]]:
     return mapping
 
 
+def _lang_suffixes(lang: Optional[str], base: str, rust_base: str, go_pkg: str) -> list[str]:
+    """Import-path suffixes specific to one language, most-specific first."""
+    return {
+        "python": [f"{base}.py", f"{base}/__init__.py"],
+        "typescript": [f"{base}.ts", f"{base}.tsx", f"{base}/index.ts", f"{base}/index.tsx"],
+        "javascript": [f"{base}.js", f"{base}/index.js"],
+        "java": [f"{base}.java"],
+        "kotlin": [f"{base}.kt"],
+        "go": [f"{go_pkg}.go"],
+        "rust": [
+            f"{rust_base}.rs", f"{rust_base}/mod.rs",
+            f"src/{rust_base}.rs", f"src/{rust_base}/mod.rs",
+        ],
+        "csharp": [f"{base}.cs"],
+        "ruby": [f"{base}.rb"],
+        "php": [f"{base}.php"],
+    }.get(lang or "", [])
+
+
 def _module_to_file_id(
-    suffix_map: dict[str, Optional[int]], module: str
+    suffix_map: dict[str, Optional[int]], module: str, lang: Optional[str] = None
 ) -> Optional[int]:
     """Resolve a module/import path to a unique file id, or None.
 
     Handles Python, TypeScript/JavaScript, Java/Kotlin/Scala, Rust (:: separator),
-    Go (last path segment), C#, Ruby, and PHP import conventions.
+    Go (last path segment), C#, Ruby, and PHP import conventions. The importing
+    file's `lang` is tried first so that, in a polyglot repo, `import './base'` from
+    a .ts file resolves to base.ts rather than a same-named base.py earlier in the
+    fixed fallback order. The fallback order is unchanged, so single-language repos
+    and the lang-unknown path behave exactly as before.
     """
     base = module.lower().replace(".", "/").strip("/")
     rust_base = module.lower().replace("::", "/").strip("/")
@@ -85,7 +108,7 @@ def _module_to_file_id(
     # Last segment used for Go package-level resolution
     go_pkg = base.rsplit("/", 1)[-1] if "/" in base else base
 
-    for suffix in (
+    fallback = (
         # Python
         f"{base}.py",
         f"{base}/__init__.py",
@@ -113,7 +136,8 @@ def _module_to_file_id(
         f"{base}.rb",
         # PHP
         f"{base}.php",
-    ):
+    )
+    for suffix in (*_lang_suffixes(lang, base, rust_base, go_pkg), *fallback):
         file_id = suffix_map.get(suffix)
         if file_id is not None:
             return file_id
diff --git a/src/codebase_index/indexer/freshness.py b/src/codebase_index/indexer/freshness.py
index b70f6ea..fbdb678 100644
--- a/src/codebase_index/indexer/freshness.py
+++ b/src/codebase_index/indexer/freshness.py
@@ -15,6 +15,7 @@
 
 from __future__ import annotations
 
+import hashlib
 import subprocess
 from pathlib import Path
 
@@ -47,25 +48,47 @@ def compute_freshness(conn, root: Path, config: Config) -> IndexFreshness:
 
 
 def _changed_count(conn, root: Path, config: Config) -> int:
-    """Added + removed + mtime-modified indexable files vs. the index."""
-    current: dict[str, int] = {}
+    """Added + removed + content-modified indexable files vs. the index.
+
+    Mirrors the incremental update's decision (indexer/pipeline.py): a file is
+    unchanged when (mtime, size) match, and even when they differ it is only
+    counted as changed if its sha256 differs. A bare `touch` that rewrites mtime
+    without changing bytes is a no-op for update_index, so it must not register as
+    stale here either.
+    """
+    indexed = repo.fingerprints(conn)  # path -> (mtime_ns, size_bytes, sha256)
+    seen: set[str] = set()
+    changed = 0
     for cand in walk(root, config):
         try:
-            current[cand.rel_path] = cand.path.stat().st_mtime_ns
+            st = cand.path.stat()
         except OSError:
             continue
-    indexed = repo.path_mtimes(conn)
-
-    changed = 0
-    for path, mtime in current.items():
-        if path not in indexed or indexed[path] != mtime:
-            changed += 1
-    for path in indexed:
-        if path not in current:
+        seen.add(cand.rel_path)
+        prior = indexed.get(cand.rel_path)
+        if prior is None:
             changed += 1
+            continue
+        if prior[0] == st.st_mtime_ns and prior[1] == cand.size_bytes:
+            continue
+        try:
+            if prior[2] == _sha256_file(cand.path):
+                continue
+        except OSError:
+            pass
+        changed += 1
+    changed += sum(1 for path in indexed if path not in seen)
     return changed
 
 
+def _sha256_file(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as fh:
+        for block in iter(lambda: fh.read(65536), b""):
+            h.update(block)
+    return h.hexdigest()
+
+
 def _git_clean_at(root: Path, indexed_head: "str | None") -> bool:
     """True iff git is available, HEAD == indexed_head, and the tree has no changes."""
     if indexed_head is None or not (root / ".git").exists():
diff --git a/src/codebase_index/retrieval/fusion.py b/src/codebase_index/retrieval/fusion.py
index ea92610..a410545 100644
--- a/src/codebase_index/retrieval/fusion.py
+++ b/src/codebase_index/retrieval/fusion.py
@@ -1,8 +1,21 @@
 """Reciprocal Rank Fusion across per-source ranked candidate lists.
 
-RRF(d) = Σ_r  w_r / (k + rank_r(d))   — robust to incomparable raw scores.
-On merge, the candidate carrying the most signal (symbol > fts > path) is kept
-as the representative so downstream rerank/snippet logic has the richest fields.
+RRF(d) = Σ_r  w_r · k / (k + rank_r(d))   — robust to incomparable raw scores.
+
+Two deliberate departures from the textbook formula:
+
+* Scaled by k. Raw RRF tops out at w/k (≈0.017 for k=60), an order of magnitude
+  below the bounded bonuses the reranker layers on top, so rerank would silently
+  become the primary ranker and RRF a mere tiebreak. Multiplying by k is a pure
+  monotonic rescale (fusion order is identical) that lifts the top contribution to
+  ≈w, putting fused scores and rerank bonuses on the same O(1) scale.
+* Fused on a coarse (path, line-bucket) key, not (path, start, end). Different
+  retrievers report different line ranges for the same place; an exact key almost
+  never coincides across sources, so cross-source agreement — RRF's whole point —
+  would never fire. `agreeing_sources` is therefore counted at file granularity.
+
+On merge, the candidate carrying the most signal (symbol > fts > path) is kept as
+the representative so downstream rerank/snippet logic has the richest fields.
 """
 
 from __future__ import annotations
@@ -26,18 +39,24 @@ def fuse(
 ) -> list[Candidate]:
     accum: dict[tuple, float] = {}
     rep: dict[tuple, Candidate] = {}
-    agree: dict[tuple, set[str]] = {}
+    seen: set[tuple] = set()
+    file_sources: dict[str, set[str]] = {}
 
     for source, candidates in lists.items():
         w = weights.get(source, 0.0)
         if w <= 0.0:
             continue
         for rank, cand in enumerate(candidates):
-            key = cand.key()
-            accum[key] = accum.get(key, 0.0) + w / (k + rank)
-            agree.setdefault(key, set()).add(source)
+            file_sources.setdefault(cand.path, set()).add(source)
+            key = cand.fuse_key()
+            # One contribution per source per locator: a file matching three FTS
+            # chunks in the same bucket is one lexical signal, not three.
+            if (source, key) in seen:
+                continue
+            seen.add((source, key))
+            accum[key] = accum.get(key, 0.0) + w * k / (k + rank)
             rep[key] = _richer(rep[key], cand) if key in rep else cand
 
     fused = [_replace(rep[key], score=score) for key, score in accum.items()]
     fused.sort(key=lambda c: c.score, reverse=True)
-    return [_replace(c, agreeing_sources=len(agree[c.key()])) for c in fused]
+    return [_replace(c, agreeing_sources=len(file_sources[c.path])) for c in fused]
diff --git a/src/codebase_index/retrieval/pipeline.py b/src/codebase_index/retrieval/pipeline.py
index 0264bc8..8a82096 100644
--- a/src/codebase_index/retrieval/pipeline.py
+++ b/src/codebase_index/retrieval/pipeline.py
@@ -22,6 +22,10 @@
 
 _TERM_RE = re.compile(r"[A-Za-z0-9_]+")
 _RRF_K = 60
+# Max results kept per file before extras are pushed to the tail. Bucketed fusion
+# already collapses co-located hits; this caps the long tail of one big file
+# dominating the page so distinct files get surfaced.
+_MAX_PER_FILE = 3
 _KIND_ALIASES = {
     "method": "method",
     "methods": "method",
@@ -67,9 +71,14 @@ def _confidence(ranked) -> Confidence:
     if not ranked:
         return Confidence.LOW
     top = ranked[0]
+    if top.score <= 0:
+        return Confidence.LOW
     if len(ranked) == 1:
-        return Confidence.MEDIUM if top.score > 0 else Confidence.LOW
-    gap = top.score - ranked[1].score
+        return Confidence.MEDIUM
+    # Relative gap, not absolute: scale-invariant, so it stays meaningful regardless
+    # of fusion's score magnitude. agreeing_sources is file-level (how many retrievers
+    # surfaced the winning file at all), the signal RRF agreement is meant to capture.
+    rel_gap = (top.score - ranked[1].score) / top.score
     agree = getattr(top, "agreeing_sources", 1)
     exact = getattr(top, "exact_symbol", False)
     n = len(ranked)
@@ -77,18 +86,31 @@ def _confidence(ranked) -> Confidence:
     if exact:
         return Confidence.HIGH
     # Strong multi-source agreement with a clear score gap
-    if agree >= 3 and gap > 0.005:
+    if agree >= 3 and rel_gap > 0.15:
         return Confidence.HIGH
-    if agree >= 2 and gap > 0.01:
+    if agree >= 2 and rel_gap > 0.25:
         return Confidence.HIGH
     # Single source but very dominant winner
-    if agree == 1 and gap > 0.05 and top.score > 0.1:
+    if agree == 1 and rel_gap > 0.5:
         return Confidence.HIGH
-    if top.score > 0 and (agree >= 2 or gap > 0.005 or n >= 5):
+    if agree >= 2 or rel_gap > 0.1 or n >= 5:
         return Confidence.MEDIUM
     return Confidence.LOW
 
 
+def _diversify(ranked: list, *, per_file: int) -> list:
+    """Stable reorder: keep the first `per_file` hits of each file in place, push
+    the rest to the tail (preserving their relative order). Nothing is dropped, so
+    recall is intact; the page just isn't monopolised by one file's many regions."""
+    kept: list = []
+    overflow: list = []
+    counts: dict[str, int] = {}
+    for c in ranked:
+        counts[c.path] = counts.get(c.path, 0) + 1
+        (kept if counts[c.path] <= per_file else overflow).append(c)
+    return kept + overflow
+
+
 def _fallback_suggestions(query, ranked) -> dict:
     terms = _TERM_RE.findall(query)
     if not terms:
@@ -121,7 +143,8 @@ def search(
         conn, query, mode=mode, limit=fetch_limit, weights=plan.weights, backend=backend
     )
     fused = fuse(lists, weights=weights, k=_RRF_K)
-    ranked = rerank(fused, query=query, intent=plan.intent)[:fetch_limit]
+    ranked = _diversify(rerank(fused, query=query, intent=plan.intent), per_file=_MAX_PER_FILE)
+    ranked = ranked[:fetch_limit]
     confidence = _confidence(ranked)
     # Scale budget proportionally so later pages receive snippet coverage.
     scaled_budget = token_budget * fetch_limit // max(limit, 1) if offset > 0 else token_budget
diff --git a/src/codebase_index/retrieval/rerank.py b/src/codebase_index/retrieval/rerank.py
index 551c485..32a6ac0 100644
--- a/src/codebase_index/retrieval/rerank.py
+++ b/src/codebase_index/retrieval/rerank.py
@@ -10,6 +10,7 @@
 import math
 import re
 
+from ..discovery.classify import is_test_path
 from .types import Candidate, Intent
 
 _TERM_RE = re.compile(r"[A-Za-z0-9_]+")
@@ -45,11 +46,19 @@ def rerank(candidates: list[Candidate], *, query: str, intent: Intent) -> list[C
         if c.in_degree:
             bonus += min(_DEGREE_CAP, math.log1p(c.in_degree) * _DEGREE_SCALE)
             reasons.append(f"{c.in_degree} callers")
+        elif c.ref_count:
+            # Precise in_degree is only computed for globally-unique symbol names
+            # (ambiguous names never resolve), so common names like `run`/`handle`
+            # always score 0. Fall back to a damped name-reference count — half the
+            # scale and cap — so centrality still breaks ties without overriding the
+            # precise signal where it exists.
+            bonus += min(_DEGREE_CAP / 2, math.log1p(c.ref_count) * (_DEGREE_SCALE / 2))
+            reasons.append(f"~{c.ref_count} refs by name")
         if intent is Intent.ARCHITECTURE and (c.in_degree + c.out_degree):
             bonus += min(_DEGREE_CAP, math.log1p(c.in_degree + c.out_degree) * (_DEGREE_SCALE / 2))
 
         wants_tests = "test" in terms or "tests" in terms
-        if c.is_generated or (("test" in c.path.lower()) and not wants_tests):
+        if c.is_generated or (is_test_path(c.path) and not wants_tests):
             bonus -= 0.15
             reasons.append("generated/test demoted")
 
diff --git a/src/codebase_index/retrieval/searchers.py b/src/codebase_index/retrieval/searchers.py
index 0cb0d00..138bf80 100644
--- a/src/codebase_index/retrieval/searchers.py
+++ b/src/codebase_index/retrieval/searchers.py
@@ -8,31 +8,24 @@
 
 import re
 import sqlite3
-from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional
 
 from ..config import Config
 from ..indexer.freshness import compute_freshness
 from ..models import (
-    Confidence,
     GraphCoverage,
     IndexFreshness,
-    ReadRange,
     RefSite,
     RefsResponse,
-    Result,
-    SearchResponse,
     SymbolDef,
     SymbolResponse,
 )
-from ..output.redact import redact_snippet
 from ..storage import repo
 from .types import Candidate as M4Candidate
 
 _WORD_RE = re.compile(r"[A-Za-z0-9_]+")
 _CAMEL_RE = re.compile(r"[A-Z]+(?![a-z])|[A-Z]?[a-z0-9]+")
-_SNIPPET_MAX_LINES = 18
 
 
 def fts_candidates(conn: sqlite3.Connection, query: str, *, limit: int) -> list[M4Candidate]:
@@ -142,6 +135,15 @@ def symbol_candidates(
                 exact_symbol=exact,
             )
         )
+
+    # Damped centrality fallback: symbols whose name is not globally unique never
+    # get a resolved in_degree, so back-fill a name-reference count for the zero ones.
+    zero_deg = [c.symbol for c in out if not c.in_degree and c.symbol]
+    if zero_deg:
+        counts = repo.name_ref_counts(conn, zero_deg)
+        for c in out:
+            if not c.in_degree and c.symbol:
+                c.ref_count = counts.get(c.symbol, 0)
     return out
 
 
@@ -161,18 +163,6 @@ def path_candidates(conn: sqlite3.Connection, query: str, *, limit: int) -> list
     return out
 
 
-@dataclass
-class Candidate:
-    chunk_id: int
-    path: str
-    line_start: int
-    line_end: int
-    content: str
-    token_est: int
-    bm25: float
-    kind: str = "window"
-
-
 def _subtokens(term: str) -> list[str]:
     parts: list[str] = []
     for piece in term.split("_"):
@@ -181,115 +171,30 @@ def _subtokens(term: str) -> list[str]:
 
 
 def build_match_query(query: str) -> str:
+    """Build the FTS5 MATCH expression for `query`.
+
+    Each whitespace term expands to an OR group over the term and its
+    camelCase/snake_case subtokens; groups are AND-ed. Natural-language filler
+    ("how does X work") is dropped first: otherwise FTS would AND-in stopwords
+    that code chunks never contain, collapsing recall to zero on the very intents
+    (HOW_IT_WORKS / DEBUG_ERROR) that weight FTS highest. If *every* term is a
+    stopword we fall back to the full set rather than emit an empty match.
+    """
     groups: list[str] = []
+    salient: list[str] = []
     for term in _WORD_RE.findall(query):
         variants = {term, *_subtokens(term)}
         variants = {v for v in variants if len(v) >= 2}
         if not variants:
             continue
         ored = " OR ".join(f'"{v}"' for v in sorted(variants, key=str.lower))
-        groups.append(f"({ored})" if len(variants) > 1 else ored)
-    # FTS5 rejects implicit AND (space) when a group contains parenthesised OR
-    # expressions; explicit AND is required between all groups.
-    return " AND ".join(groups)
-
-
-def fts_search(conn: sqlite3.Connection, query: str, *, limit: int) -> list[Candidate]:
-    match = build_match_query(query)
-    rows = repo.fts_search(conn, match, limit=limit)
-    return [
-        Candidate(
-            chunk_id=r["chunk_id"],
-            path=r["path"],
-            line_start=r["line_start"],
-            line_end=r["line_end"],
-            content=r["content"],
-            token_est=r["token_est"],
-            bm25=r["bm25"],
-            kind=r.get("kind", "window"),  # type: ignore[attr-defined]
-        )
-        for r in rows
-    ]
-
-
-def fts_response(
-    conn: sqlite3.Connection,
-    query: str,
-    *,
-    limit: int,
-    token_budget: int,
-    root: Path,
-    config: Optional[Config] = None,
-) -> SearchResponse:
-    candidates = fts_search(conn, query, limit=limit)
-    results: list[Result] = []
-    recommended: list[ReadRange] = []
-    spent = 0
-
-    for rank, candidate in enumerate(candidates, start=1):
-        recommended.append(
-            ReadRange(
-                path=candidate.path,
-                line_start=candidate.line_start,
-                line_end=candidate.line_end,
-            )
-        )
-        snippet: Optional[str] = None
-        if spent + candidate.token_est <= token_budget:
-            snippet = redact_snippet(_trim(candidate.content))
-            spent += candidate.token_est
-        results.append(
-            Result(
-                rank=rank,
-                path=candidate.path,
-                line_start=candidate.line_start,
-                line_end=candidate.line_end,
-                symbols=[],
-                score=round(1.0 / rank, 4),
-                reason="doc match" if candidate.kind == "doc" else "lexical match (bm25)",
-                snippet=snippet,
-            )
-        )
-
-    confidence = _confidence(candidates)
-    return SearchResponse(
-        query=query,
-        intent="keyword",
-        index=_freshness(conn, root, config),
-        confidence=confidence,
-        results=results,
-        recommended_reads=recommended,
-        fallback_suggestions=_fallbacks(query) if confidence != "high" else {},
-    )
-
-
-def _trim(content: str) -> str:
-    lines = content.splitlines()
-    if len(lines) <= _SNIPPET_MAX_LINES:
-        return content
-    return "\n".join(lines[:_SNIPPET_MAX_LINES]) + "\n..."
-
-
-def _confidence(candidates: list[Candidate]) -> Confidence:
-    if not candidates:
-        return "low"
-    if len(candidates) == 1:
-        return "medium"
-    top, second = candidates[0], candidates[1]
-    gap = abs(second.bm25 - top.bm25)
-    n = len(candidates)
-    # Clear BM25 separation, or moderate gap with several corroborating results
-    if gap >= 2.0 or (gap >= 1.0 and n >= 3):
-        return "high"
-    if gap >= 0.3 or n >= 3:
-        return "medium"
-    return "low"
-
-
-def _fallbacks(query: str) -> dict[str, list[str]]:
-    terms = _WORD_RE.findall(query)
-    primary = terms[0] if terms else query
-    return {"ripgrep": [f'rg -n "{primary}"', f'rg -ni "{primary}"']}
+        # FTS5 rejects implicit AND (space) when a group contains parenthesised OR
+        # expressions; explicit AND is required between all groups.
+        group = f"({ored})" if len(variants) > 1 else ored
+        groups.append(group)
+        if term.lower() not in _SYMBOL_STOPWORDS:
+            salient.append(group)
+    return " AND ".join(salient or groups)
 
 
 def _freshness(
diff --git a/src/codebase_index/retrieval/types.py b/src/codebase_index/retrieval/types.py
index 6f184a9..3fcf71d 100644
--- a/src/codebase_index/retrieval/types.py
+++ b/src/codebase_index/retrieval/types.py
@@ -6,6 +6,11 @@
 from enum import Enum
 from typing import Optional
 
+# Line window used by Candidate.fuse_key to group co-located hits across retrievers.
+# Wide enough to merge a symbol body and the FTS window that overlaps it, narrow
+# enough to keep distinct regions of a large file separate.
+_FUSE_BUCKET_LINES = 40
+
 
 class Intent(str, Enum):
     LOCATE_IMPL = "locate_impl"
@@ -39,6 +44,7 @@ class Candidate:
     token_est: int = 0
     in_degree: int = 0
     out_degree: int = 0
+    ref_count: int = 0
     is_generated: bool = False
     exact_symbol: bool = False
     reason: str = ""
@@ -47,6 +53,19 @@ class Candidate:
     def key(self) -> tuple[str, int, int]:
         return (self.path, self.line_start, self.line_end)
 
+    def fuse_key(self) -> tuple[str, int]:
+        """Coarse locator for RRF fusion: path + line bucket.
+
+        Different retrievers emit different granularities for the same place — a
+        symbol body, an 80-line FTS window, a path hit anchored at line 1 — so an
+        exact (path, start, end) key almost never coincides across sources and RRF
+        degenerates into a weighted round-robin that never rewards agreement.
+        Bucketing line_start collapses co-located hits onto one key so their
+        per-source RRF contributions actually sum, while still separating genuinely
+        distant regions of a large file.
+        """
+        return (self.path, (max(self.line_start, 1) - 1) // _FUSE_BUCKET_LINES)
+
 
 @dataclass
 class IntentPlan:
diff --git a/src/codebase_index/storage/db.py b/src/codebase_index/storage/db.py
index f6b2f15..60abbcd 100644
--- a/src/codebase_index/storage/db.py
+++ b/src/codebase_index/storage/db.py
@@ -7,7 +7,8 @@
 from pathlib import Path
 from typing import Optional
 
-SCHEMA_VERSION = 1
+# 2: chunks gained a denormalized `symbol_names` column (FTS symbol-name boost).
+SCHEMA_VERSION = 2
 
 
 class Database:
@@ -73,6 +74,11 @@ def _guard_version(self) -> None:
                 f"Index schema_version {current} is newer than supported {SCHEMA_VERSION}; "
                 "rebuild the index with an updated CLI."
             )
+        # current < SCHEMA_VERSION is tolerated on open: queries never read the
+        # added columns, so an older index is still safely *readable*. The build
+        # commands (index/update) detect the mismatch via peek_schema_version and
+        # rebuild from scratch, since there is no in-place migration framework and
+        # schema.sql is applied with IF NOT EXISTS (old tables/triggers persist).
 
     def enable_vectors(self) -> None:
         """Load the sqlite-vec extension into this connection (optional extra)."""
@@ -85,3 +91,25 @@ def enable_vectors(self) -> None:
         self.conn.enable_load_extension(True)
         sqlite_vec.load(self.conn)
         self.conn.enable_load_extension(False)
+
+
+def peek_schema_version(path: Path | str) -> int:
+    """Read meta.schema_version without applying schema or running the guard.
+
+    Returns 0 when the file, the meta table, or the key is absent/unreadable, so
+    callers can treat "0 < peek < SCHEMA_VERSION" (or a missing meta) as "rebuild".
+    """
+    p = Path(path)
+    if not p.exists():
+        return 0
+    try:
+        conn = sqlite3.connect(p)
+        try:
+            row = conn.execute(
+                "SELECT value FROM meta WHERE key = 'schema_version'"
+            ).fetchone()
+            return int(row[0]) if row else 0
+        finally:
+            conn.close()
+    except (sqlite3.Error, ValueError, OSError):
+        return 0
diff --git a/src/codebase_index/storage/repo.py b/src/codebase_index/storage/repo.py
index debd4e2..77a54cd 100644
--- a/src/codebase_index/storage/repo.py
+++ b/src/codebase_index/storage/repo.py
@@ -102,12 +102,15 @@ def _symbol_id(chunk: Chunk) -> Optional[int]:
             return symbol_ids[chunk.symbol_index]
         return None
 
+    # symbol_names is denormalized into the chunk (see schema.sql): resolve the
+    # name from the just-inserted symbol row (replace_symbols runs first). Stored so
+    # the FTS triggers can replay it verbatim on delete/update.
     conn.executemany(
         """
         INSERT INTO chunks
-            (file_id, line_start, line_end, kind, symbol_id, content, token_est)
+            (file_id, line_start, line_end, kind, symbol_id, content, token_est, symbol_names)
         VALUES
-            (?, ?, ?, ?, ?, ?, ?)
+            (?, ?, ?, ?, ?, ?, ?, COALESCE((SELECT name FROM symbols WHERE id = ?), ''))
         """,
         [
             (
@@ -118,6 +121,7 @@ def _symbol_id(chunk: Chunk) -> Optional[int]:
                 _symbol_id(c),
                 c.content,
                 c.token_est,
+                _symbol_id(c),
             )
             for c in chunks
         ],
@@ -369,8 +373,10 @@ def symbol_search(
 
 def unresolved_edges(conn: sqlite3.Connection) -> list[sqlite3.Row]:
     return conn.execute(
-        "SELECT id, edge_type, dst_name FROM edges "
-        "WHERE resolved = 0 AND dst_name IS NOT NULL ORDER BY id"
+        "SELECT e.id AS id, e.edge_type AS edge_type, e.dst_name AS dst_name, "
+        "       f.lang AS lang "
+        "FROM edges e JOIN files f ON f.id = e.file_id "
+        "WHERE e.resolved = 0 AND e.dst_name IS NOT NULL ORDER BY e.id"
     ).fetchall()
 
 
@@ -391,6 +397,25 @@ def resolve_edges_bulk(
     )
 
 
+def name_ref_counts(conn: sqlite3.Connection, names: Sequence[str]) -> dict[str, int]:
+    """Count edges targeting each name (any resolution state), keyed by dst_name.
+
+    A damped centrality proxy for symbols whose precise in_degree is 0 because their
+    name is not globally unique (ambiguous edges never resolve). Over-counts across
+    same-named symbols by design — it is only used as a weak tiebreak fallback.
+    """
+    uniq = [n for n in dict.fromkeys(names) if n]
+    if not uniq:
+        return {}
+    placeholders = ",".join("?" * len(uniq))
+    rows = conn.execute(
+        f"SELECT dst_name, COUNT(*) AS c FROM edges "
+        f"WHERE dst_name IN ({placeholders}) GROUP BY dst_name",
+        tuple(uniq),
+    ).fetchall()
+    return {row["dst_name"]: int(row["c"]) for row in rows}
+
+
 def unique_symbol_ids_by_name(conn: sqlite3.Connection) -> dict[str, int]:
     """Map symbol name -> id for names defined exactly once in the repo."""
     return {
diff --git a/src/codebase_index/storage/schema.sql b/src/codebase_index/storage/schema.sql
index e0d08ed..10bde93 100644
--- a/src/codebase_index/storage/schema.sql
+++ b/src/codebase_index/storage/schema.sql
@@ -44,7 +44,12 @@ CREATE TABLE IF NOT EXISTS chunks (
     kind          TEXT,
     symbol_id     INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
     content       TEXT NOT NULL,
-    token_est     INTEGER NOT NULL
+    token_est     INTEGER NOT NULL,
+    -- Denormalized copy of the chunk's symbol name, populated at write time.
+    -- Stored (not a live join) so the FTS triggers below can replay the exact
+    -- indexed value on delete/update; a subquery would read a symbol row that the
+    -- ON DELETE SET NULL cascade may already have detached, corrupting the index.
+    symbol_names  TEXT NOT NULL DEFAULT ''
 );
 CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
 
@@ -92,19 +97,23 @@ CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
     tokenize = "unicode61 remove_diacritics 2"
 );
 
+-- symbol_names mirrors new/old.symbol_names (the stored chunk column), NOT a live
+-- join: external-content FTS requires the delete to replay the exact value that was
+-- indexed, which a join could no longer reproduce after a symbol cascade. path is
+-- UNINDEXED so its delete value is irrelevant.
 CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
     INSERT INTO fts_chunks(rowid, content, symbol_names, path)
-    VALUES (new.id, new.content, '', (SELECT path FROM files WHERE id = new.file_id));
+    VALUES (new.id, new.content, new.symbol_names, (SELECT path FROM files WHERE id = new.file_id));
 END;
 CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
     INSERT INTO fts_chunks(fts_chunks, rowid, content, symbol_names, path)
-    VALUES ('delete', old.id, old.content, '', '');
+    VALUES ('delete', old.id, old.content, old.symbol_names, '');
 END;
 CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
     INSERT INTO fts_chunks(fts_chunks, rowid, content, symbol_names, path)
-    VALUES ('delete', old.id, old.content, '', '');
+    VALUES ('delete', old.id, old.content, old.symbol_names, '');
     INSERT INTO fts_chunks(rowid, content, symbol_names, path)
-    VALUES (new.id, new.content, '', (SELECT path FROM files WHERE id = new.file_id));
+    VALUES (new.id, new.content, new.symbol_names, (SELECT path FROM files WHERE id = new.file_id));
 END;
 
 -- vec_chunks (sqlite-vec) is created at runtime ONLY when embeddings.enabled = true.
diff --git a/tests/golden/mcp_search_code.json b/tests/golden/mcp_search_code.json
index 7f81862..d213911 100644
--- a/tests/golden/mcp_search_code.json
+++ b/tests/golden/mcp_search_code.json
@@ -16,36 +16,6 @@
       "line_end": 6,
       "line_start": 4,
       "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 5,
-      "line_start": 4,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 11,
-      "line_start": 9,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 2,
-      "line_start": 1,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 10,
-      "line_start": 9,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 1,
-      "line_start": 1,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 5,
-      "line_start": 1,
-      "path": "src/api/service.py"
     }
   ],
   "results": [
@@ -55,89 +25,23 @@
       "path": "src/auth/token.py",
       "rank": 1,
       "reason": "in src/auth/ · 2 callers",
-      "score": 0.1596,
+      "score": 2.233,
       "snippet": "def refresh_access_token(refresh_token: str) -> str:",
       "symbols": [
         "refresh_access_token"
       ],
       "token_est": 13
     },
-    {
-      "line_end": 5,
-      "line_start": 4,
-      "path": "src/auth/token.py",
-      "rank": 2,
-      "reason": "in src/auth/",
-      "score": 0.0664,
-      "snippet": "Exchange a refresh token for a new access token.",
-      "symbols": [],
-      "token_est": 12
-    },
-    {
-      "line_end": 11,
-      "line_start": 9,
-      "path": "src/auth/token.py",
-      "rank": 3,
-      "reason": "in src/auth/",
-      "score": 0.0661,
-      "snippet": "def login(refresh_token: str) -> str:\n    \"\"\"Calls refresh_access_token so refs/impact tests have an edge.\"\"\"\n    return refresh_access_token(refresh_token)",
-      "symbols": [],
-      "token_est": 39
-    },
-    {
-      "line_end": 2,
-      "line_start": 1,
-      "path": "src/auth/token.py",
-      "rank": 4,
-      "reason": "in src/auth/",
-      "score": 0.0659,
-      "snippet": "\"\"\"Token helpers (fixture).\"\"\"\n",
-      "symbols": [],
-      "token_est": 8
-    },
-    {
-      "line_end": 10,
-      "line_start": 9,
-      "path": "src/auth/token.py",
-      "rank": 5,
-      "reason": "in src/auth/",
-      "score": 0.0652,
-      "snippet": "Calls refresh_access_token so refs/impact tests have an edge.",
-      "symbols": [],
-      "token_est": 15
-    },
-    {
-      "line_end": 1,
-      "line_start": 1,
-      "path": "src/auth/token.py",
-      "rank": 6,
-      "reason": "in src/auth/",
-      "score": 0.0583,
-      "snippet": null,
-      "symbols": [],
-      "token_est": 0
-    },
     {
       "line_end": 11,
       "line_start": 7,
       "path": "src/api/service.py",
-      "rank": 7,
+      "rank": 2,
       "reason": "fts",
-      "score": 0.0156,
+      "score": 0.9375,
       "snippet": "class AdminUser(User):\n    \"\"\"Subclass of User; imported-from edge target for impact tests.\"\"\"\n\n    def renew(self, refresh_token: str) -> str:\n        return refresh_access_token(refresh_token)",
       "symbols": [],
       "token_est": 48
-    },
-    {
-      "line_end": 5,
-      "line_start": 1,
-      "path": "src/api/service.py",
-      "rank": 8,
-      "reason": "fts",
-      "score": 0.0154,
-      "snippet": "\"\"\"Service layer (fixture) - exercises cross-file edges for impact tests.\"\"\"\n\nfrom auth.token import refresh_access_token\nfrom models.user import User\n",
-      "symbols": [],
-      "token_est": 38
     }
   ],
   "schema_version": 1,
diff --git a/tests/golden/search_token.json b/tests/golden/search_token.json
index 732f6c4..a04d5fe 100644
--- a/tests/golden/search_token.json
+++ b/tests/golden/search_token.json
@@ -16,36 +16,6 @@
       "line_end": 6,
       "line_start": 4,
       "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 5,
-      "line_start": 4,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 11,
-      "line_start": 9,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 2,
-      "line_start": 1,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 10,
-      "line_start": 9,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 1,
-      "line_start": 1,
-      "path": "src/auth/token.py"
-    },
-    {
-      "line_end": 5,
-      "line_start": 1,
-      "path": "src/api/service.py"
     }
   ],
   "results": [
@@ -55,89 +25,23 @@
       "path": "src/auth/token.py",
       "rank": 1,
       "reason": "in src/auth/ · 2 callers",
-      "score": 0.1596,
+      "score": 2.233,
       "snippet": "def refresh_access_token(refresh_token: str) -> str:",
       "symbols": [
         "refresh_access_token"
       ],
       "token_est": 13
     },
-    {
-      "line_end": 5,
-      "line_start": 4,
-      "path": "src/auth/token.py",
-      "rank": 2,
-      "reason": "in src/auth/",
-      "score": 0.0664,
-      "snippet": "Exchange a refresh token for a new access token.",
-      "symbols": [],
-      "token_est": 12
-    },
-    {
-      "line_end": 11,
-      "line_start": 9,
-      "path": "src/auth/token.py",
-      "rank": 3,
-      "reason": "in src/auth/",
-      "score": 0.0661,
-      "snippet": "def login(refresh_token: str) -> str:\n    \"\"\"Calls refresh_access_token so refs/impact tests have an edge.\"\"\"\n    return refresh_access_token(refresh_token)",
-      "symbols": [],
-      "token_est": 39
-    },
-    {
-      "line_end": 2,
-      "line_start": 1,
-      "path": "src/auth/token.py",
-      "rank": 4,
-      "reason": "in src/auth/",
-      "score": 0.0659,
-      "snippet": "\"\"\"Token helpers (fixture).\"\"\"\n",
-      "symbols": [],
-      "token_est": 8
-    },
-    {
-      "line_end": 10,
-      "line_start": 9,
-      "path": "src/auth/token.py",
-      "rank": 5,
-      "reason": "in src/auth/",
-      "score": 0.0652,
-      "snippet": "Calls refresh_access_token so refs/impact tests have an edge.",
-      "symbols": [],
-      "token_est": 15
-    },
-    {
-      "line_end": 1,
-      "line_start": 1,
-      "path": "src/auth/token.py",
-      "rank": 6,
-      "reason": "in src/auth/",
-      "score": 0.0583,
-      "snippet": null,
-      "symbols": [],
-      "token_est": 0
-    },
     {
       "line_end": 11,
       "line_start": 7,
       "path": "src/api/service.py",
-      "rank": 7,
+      "rank": 2,
       "reason": "fts",
-      "score": 0.0156,
+      "score": 0.9375,
       "snippet": "class AdminUser(User):\n    \"\"\"Subclass of User; imported-from edge target for impact tests.\"\"\"\n\n    def renew(self, refresh_token: str) -> str:\n        return refresh_access_token(refresh_token)",
       "symbols": [],
       "token_est": 48
-    },
-    {
-      "line_end": 5,
-      "line_start": 1,
-      "path": "src/api/service.py",
-      "rank": 8,
-      "reason": "fts",
-      "score": 0.0154,
-      "snippet": "\"\"\"Service layer (fixture) - exercises cross-file edges for impact tests.\"\"\"\n\nfrom auth.token import refresh_access_token\nfrom models.user import User\n",
-      "symbols": [],
-      "token_est": 38
     }
   ]
 }
diff --git a/tests/test_classify.py b/tests/test_classify.py
index 87aa43c..547e518 100644
--- a/tests/test_classify.py
+++ b/tests/test_classify.py
@@ -4,6 +4,7 @@
     detect_language,
     is_generated,
     is_secret_filename,
+    is_test_path,
     looks_binary,
     parser_for,
 )
@@ -65,4 +66,28 @@ def test_binary_detection():
 def test_generated_detection():
     assert is_generated("src/schema.generated.ts")
     assert is_generated("web/app.min.js")
-    assert not is_generated("web/app.ts")
\ No newline at end of file
+    assert not is_generated("web/app.ts")
+
+
+def test_is_test_path_matches_test_trees_and_modules():
+    for path in [
+        "tests/test_auth.py",
+        "src/__tests__/user.test.ts",
+        "pkg/foo_test.go",
+        "app/user.spec.ts",
+        "e2e/login.py",
+        "project/test/Thing.java",
+    ]:
+        assert is_test_path(path), path
+
+
+def test_is_test_path_does_not_match_substring_lookalikes():
+    # Word-boundary, not bare substring: these contain "test" but are not tests.
+    for path in [
+        "src/contest/leaderboard.py",
+        "lib/latest.py",
+        "util/fastest_path.ts",
+        "web/testimonials.tsx",
+        "src/attestation.py",
+    ]:
+        assert not is_test_path(path), path
\ No newline at end of file
diff --git a/tests/test_freshness.py b/tests/test_freshness.py
index 7a7ee89..af83cac 100644
--- a/tests/test_freshness.py
+++ b/tests/test_freshness.py
@@ -30,18 +30,40 @@ def test_freshly_built_index_is_not_stale(sample_repo, tmp_path):
     db.close()
 
 
-def test_edited_file_makes_index_stale(sample_repo, tmp_path, monkeypatch):
-    """An indexed file whose mtime advanced past the build is counted as changed."""
+def test_edited_file_content_makes_index_stale(sample_repo, tmp_path):
+    """A file whose indexed content (sha256) no longer matches disk is stale."""
     cfg, db = _indexed(sample_repo, tmp_path)
 
     from codebase_index.storage import repo
-    indexed = repo.path_mtimes(db.conn)
+    indexed = repo.fingerprints(db.conn)
     a_path = next(iter(indexed))
-    repo.set_meta(db.conn, "head_commit", "deadbeef")
-    db.conn.execute("UPDATE files SET mtime_ns = 1 WHERE path = ?", (a_path,))
+    repo.set_meta(db.conn, "head_commit", "deadbeef")  # force the accurate (non-git) path
+    # Corrupt the stored fingerprint so the on-disk content hashes differently;
+    # mtime is bumped so the (mtime,size) fast-equal check can't short-circuit.
+    db.conn.execute(
+        "UPDATE files SET mtime_ns = 1, sha256 = 'stale-sha' WHERE path = ?", (a_path,)
+    )
     db.conn.commit()
 
     fr = compute_freshness(db.conn, sample_repo, cfg)
     assert fr.stale is True
     assert fr.files_changed_since_build >= 1
     db.close()
+
+
+def test_touch_without_content_change_is_not_stale(sample_repo, tmp_path):
+    """A bare mtime bump with unchanged bytes is a no-op for update_index, so
+    freshness must not flag it as stale (it mirrors the sha-based decision)."""
+    cfg, db = _indexed(sample_repo, tmp_path)
+
+    from codebase_index.storage import repo
+    indexed = repo.fingerprints(db.conn)
+    a_path = next(iter(indexed))
+    repo.set_meta(db.conn, "head_commit", "deadbeef")  # force the accurate (non-git) path
+    db.conn.execute("UPDATE files SET mtime_ns = 1 WHERE path = ?", (a_path,))
+    db.conn.commit()
+
+    fr = compute_freshness(db.conn, sample_repo, cfg)
+    assert fr.stale is False
+    assert fr.files_changed_since_build == 0
+    db.close()
diff --git a/tests/test_fusion.py b/tests/test_fusion.py
index c7e30aa..693068f 100644
--- a/tests/test_fusion.py
+++ b/tests/test_fusion.py
@@ -34,3 +34,33 @@ def test_vector_source_participates_in_fusion():
     vec = [Candidate(path="v.py", line_start=1, line_end=2, source="vector", score=0.9)]
     fused = fuse({"vector": vec}, weights={"vector": 1.0}, k=60)
     assert fused and fused[0].path == "v.py"
+
+
+def test_fuse_merges_co_located_hits_across_line_ranges():
+    """Different retrievers report different line ranges for the same place; fusion
+    buckets line_start so co-located cross-source hits still reinforce each other."""
+    fts = [Candidate(path="a.py", line_start=10, line_end=80, source="fts", score=0.9)]
+    sym = [Candidate(path="a.py", line_start=12, line_end=20, source="symbol", score=0.8)]
+    fused = fuse({"fts": fts, "symbol": sym}, weights={"fts": 1.0, "symbol": 1.0}, k=60)
+    assert len(fused) == 1                       # merged despite differing ranges
+    assert fused[0].agreeing_sources == 2        # file-level agreement counted
+
+
+def test_fuse_scores_are_order_one():
+    """RRF is rescaled by k so the top contribution is ~weight (≈1), not ~w/k (≈0.017),
+    keeping fused scores on the same scale as the reranker's bounded bonuses."""
+    fts = [Candidate(path="a.py", line_start=1, line_end=2, source="fts", score=0.9)]
+    fused = fuse({"fts": fts}, weights={"fts": 1.0}, k=60)
+    assert 0.9 < fused[0].score <= 1.0
+
+
+def test_fuse_dedupes_repeated_source_hits_in_one_bucket():
+    """Three FTS chunks of the same file/bucket are one lexical signal, not three."""
+    fts = [
+        Candidate(path="a.py", line_start=1, line_end=10, source="fts", score=0.9),
+        Candidate(path="a.py", line_start=11, line_end=20, source="fts", score=0.8),
+        Candidate(path="a.py", line_start=21, line_end=30, source="fts", score=0.7),
+    ]
+    fused = fuse({"fts": fts}, weights={"fts": 1.0}, k=60)
+    assert len(fused) == 1
+    assert fused[0].score <= 1.0  # single best-rank contribution, not summed 3x
diff --git a/tests/test_rerank.py b/tests/test_rerank.py
index 08fe1c5..208557f 100644
--- a/tests/test_rerank.py
+++ b/tests/test_rerank.py
@@ -40,6 +40,31 @@ def test_in_degree_bonus_is_sublinear_and_capped():
     assert scores[2] < 2 * scores[1]              # 100 callers nowhere near 10x of 10
 
 
+def test_ref_count_is_damped_fallback_when_in_degree_zero():
+    """A symbol with no resolved in_degree (ambiguous name) still gets a small
+    centrality nudge from its name-reference count — but capped below the precise
+    in_degree bonus so it can never override real callers."""
+    no_signal = _c("a.py", "symbol", 0.0)
+    by_name = _c("b.py", "symbol", 0.0, ref_count=50)
+    rerank([no_signal, by_name], query="zzz", intent=Intent.KEYWORD)
+    assert by_name.score > no_signal.score
+    assert by_name.score <= _DEGREE_CAP / 2 + 1e-9          # damped: half the cap
+
+    # Precise in_degree, when present, takes precedence over the name-based proxy.
+    precise = _c("c.py", "symbol", 0.0, in_degree=3)
+    proxy = _c("d.py", "symbol", 0.0, ref_count=3)
+    rerank([precise, proxy], query="zzz", intent=Intent.KEYWORD)
+    assert precise.score > proxy.score
+
+
+def test_contest_path_is_not_demoted_as_test():
+    """The test demotion is word-boundary aware: 'contest' is not a test path."""
+    contest = _c("src/contest/board.py", "fts", 0.5)
+    real_test = _c("tests/test_board.py", "fts", 0.5)
+    rerank([contest, real_test], query="board", intent=Intent.KEYWORD)
+    assert contest.score > real_test.score
+
+
 def test_god_class_does_not_outrank_relevant_match_on_stray_term():
     """A high-in_degree 'god class' that matches only a stray term must not float
     above a genuinely relevant (name/path) match with a slightly lower base score.
diff --git a/tests/test_search_cli.py b/tests/test_search_cli.py
index d0896a4..905891a 100644
--- a/tests/test_search_cli.py
+++ b/tests/test_search_cli.py
@@ -115,7 +115,9 @@ def test_search_reports_stale_after_edit(sample_repo, tmp_path, monkeypatch):
 
     db_path = sample_repo / ".claude" / "cache" / "codebase-index" / "index.sqlite"
     conn = sqlite3.connect(str(db_path))
-    conn.execute("UPDATE files SET mtime_ns = 1")
+    # Freshness is content-aware (sha), not mtime-only: corrupt the stored hash so
+    # the recomputed on-disk content no longer matches the index → genuinely stale.
+    conn.execute("UPDATE files SET mtime_ns = 1, sha256 = 'stale-sha'")
     conn.execute("DELETE FROM meta WHERE key = 'head_commit'")
     conn.commit()
     conn.close()
@@ -147,7 +149,9 @@ def test_explain_reports_stale_after_edit(sample_repo):
 
     db_path = sample_repo / ".claude" / "cache" / "codebase-index" / "index.sqlite"
     conn = sqlite3.connect(str(db_path))
-    conn.execute("UPDATE files SET mtime_ns = 1")
+    # Freshness is content-aware (sha), not mtime-only: corrupt the stored hash so
+    # the recomputed on-disk content no longer matches the index → genuinely stale.
+    conn.execute("UPDATE files SET mtime_ns = 1, sha256 = 'stale-sha'")
     conn.execute("DELETE FROM meta WHERE key = 'head_commit'")
     conn.commit()
     conn.close()
diff --git a/tests/test_searchers.py b/tests/test_searchers.py
index 05772f3..285337d 100644
--- a/tests/test_searchers.py
+++ b/tests/test_searchers.py
@@ -1,8 +1,22 @@
 from codebase_index.retrieval.searchers import (
-    fts_candidates, path_candidates, symbol_candidates,
+    build_match_query, fts_candidates, path_candidates, symbol_candidates,
 )
 
 
+def test_build_match_query_drops_stopwords():
+    # Natural-language filler must not be AND-ed into the match (it kills recall).
+    q = build_match_query("how does authentication work")
+    assert "how" not in q.lower() and "does" not in q.lower()
+    assert "authentication" in q.lower() and "work" in q.lower()
+    assert " AND " in q  # salient terms are still AND-ed together
+
+
+def test_build_match_query_falls_back_when_all_stopwords():
+    # If every term is a stopword we must still emit a (non-empty) match, not "".
+    q = build_match_query("how does it")
+    assert q.strip() != ""
+
+
 def test_fts_candidates_uniform_shape(seeded_index):
     cands = fts_candidates(seeded_index.conn, "token", limit=10)
     assert cands and all(c.source == "fts" for c in cands)
diff --git a/tests/test_storage.py b/tests/test_storage.py
index 92d1a8c..c43effa 100644
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -269,6 +269,99 @@ def test_replace_chunks_with_symbol_ids(tmp_path):
     db.close()
 
 
+def test_chunk_symbol_names_populated_and_searchable(tmp_path):
+    """The chunk's symbol name is denormalized into symbol_names and indexed by FTS,
+    so a query matching only the symbol name (not the body text) still hits."""
+    db = _open(tmp_path)
+    fid = repo.upsert_file(
+        db.conn, path="m.py", lang="python", size_bytes=1, sha256="h", mtime_ns=1,
+        git_status=None, parser="treesitter", indexed_at="t", is_generated=False,
+    )
+    sids = repo.replace_symbols(
+        db.conn, fid,
+        [Symbol(name="refresh_access_token", kind="function",
+                line_start=1, line_end=2, qualified="refresh_access_token")],
+    )
+    # Body text deliberately omits the symbol name, isolating the symbol_names column.
+    repo.replace_chunks(
+        db.conn, fid,
+        [Chunk(line_start=1, line_end=2, content="def f():\n    return 1",
+               token_est=3, kind="symbol_body", symbol_index=0)],
+        symbol_ids=sids,
+    )
+    assert repo.chunks_for_file(db.conn, fid)[0]["symbol_names"] == "refresh_access_token"
+    hit = repo.fts_search(db.conn, "refresh_access_token", limit=10)
+    assert len(hit) == 1 and hit[0]["path"] == "m.py"
+    db.close()
+
+
+def test_chunk_symbol_names_delete_keeps_fts_consistent(tmp_path):
+    """External-content FTS corrupts if a delete replays the wrong indexed value.
+    Replacing chunks (and cascading symbol deletes) must leave a consistent index."""
+    db = _open(tmp_path)
+    fid = repo.upsert_file(
+        db.conn, path="m.py", lang="python", size_bytes=1, sha256="h", mtime_ns=1,
+        git_status=None, parser="treesitter", indexed_at="t", is_generated=False,
+    )
+    sids = repo.replace_symbols(
+        db.conn, fid,
+        [Symbol(name="alpha_symbol", kind="function", line_start=1, line_end=2,
+                qualified="alpha_symbol")],
+    )
+    repo.replace_chunks(
+        db.conn, fid,
+        [Chunk(line_start=1, line_end=2, content="body one", token_est=2,
+               kind="symbol_body", symbol_index=0)],
+        symbol_ids=sids,
+    )
+    assert repo.fts_search(db.conn, "alpha_symbol", limit=10)
+
+    # Re-index the file: deletes the old symbol (cascades chunk.symbol_id -> NULL)
+    # and old chunk, inserts a fresh one with a different symbol name.
+    sids2 = repo.replace_symbols(
+        db.conn, fid,
+        [Symbol(name="beta_symbol", kind="function", line_start=1, line_end=2,
+                qualified="beta_symbol")],
+    )
+    repo.replace_chunks(
+        db.conn, fid,
+        [Chunk(line_start=1, line_end=2, content="body two", token_est=2,
+               kind="symbol_body", symbol_index=0)],
+        symbol_ids=sids2,
+    )
+    assert repo.fts_search(db.conn, "alpha_symbol", limit=10) == []   # old name gone
+    assert repo.fts_search(db.conn, "beta_symbol", limit=10)          # new name present
+    assert db.conn.execute("PRAGMA integrity_check").fetchone()[0] == "ok"
+    db.close()
+
+
+def test_name_ref_counts(tmp_path):
+    db = _open(tmp_path)
+    fid = repo.upsert_file(
+        db.conn, path="m.py", lang="python", size_bytes=1, sha256="h", mtime_ns=1,
+        git_status=None, parser="treesitter", indexed_at="t", is_generated=False,
+    )
+    sids = repo.replace_symbols(
+        db.conn, fid,
+        [Symbol(name="caller", kind="function", line_start=1, line_end=2, qualified="caller")],
+    )
+    repo.replace_edges(
+        db.conn, fid,
+        [
+            {"edge_type": "call", "src_kind": "symbol", "src_id": sids[0],
+             "dst_kind": None, "dst_id": None, "dst_name": "run", "line": 1, "resolved": 0},
+            {"edge_type": "call", "src_kind": "symbol", "src_id": sids[0],
+             "dst_kind": None, "dst_id": None, "dst_name": "run", "line": 2, "resolved": 0},
+            {"edge_type": "call", "src_kind": "symbol", "src_id": sids[0],
+             "dst_kind": None, "dst_id": None, "dst_name": "once", "line": 3, "resolved": 0},
+        ],
+    )
+    counts = repo.name_ref_counts(db.conn, ["run", "once", "absent"])
+    assert counts == {"run": 2, "once": 1}
+    assert repo.name_ref_counts(db.conn, []) == {}
+    db.close()
+
+
 def test_replace_edges_and_refs_for_name(tmp_path):
     db = _open(tmp_path)
     fid = repo.upsert_file(