From 968f63bdc159d4e7513af40c681153ebf40bdb06 Mon Sep 17 00:00:00 2001
From: "sds.rs" <jack.sds5889@gmail.com>
Date: Sun, 10 May 2026 05:07:34 +0800
Subject: [PATCH] refactor(indexer): split pipeline.rs into 7-module
 per-concern tree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

src/indexer/pipeline.rs (2374 lines) → src/indexer/pipeline/{mod,embed,
context,python_modules,resolve,index_files,tests}.rs. mod.rs (237 lines)
keeps the public entry points (run_full_index, ensure_file_indexed,
run_incremental_index{,_cached}) + IndexStats/IndexResult/ProgressFn +
collect_dirty_node_ids glue. The Phase-0..3 orchestrator stays whole in
index_files.rs (827 lines) — its phases share local transaction/atomics/
batch_parsed/name_to_ids state that splitting would have to thread back
in via large arg lists.

Per-concern submodules:
  embed.rs          (71)  — embed_and_store_batch + sequential fallback
  context.rs        (197) — categorize_edges + format_route_from_metadata
                            + regenerate_context_strings + repair_null_*
  python_modules.rs (73)  — build_python_module_map + resolve_python_*
  resolve.rs        (204) — refine_ambiguous_targets + resolve_pending_calls
  index_files.rs    (827) — Phase 0..3 orchestrator + FileIndexed
  tests.rs          (884) — all #[cfg(test)] tests

Public surface preserved (`crate::indexer::pipeline::{run_full_index,
ensure_file_indexed, run_incremental_index, run_incremental_index_cached,
embed_and_store_batch, repair_null_context_strings, IndexStats,
IndexResult, ProgressFn}`). External callers in cli.rs / mcp/server /
tests / benches / claude-plugin all keep their imports unchanged.

Verification:
- cargo check: clean
- cargo +1.95.0 clippy --no-default-features -- -D warnings: clean
- cargo +1.95.0 clippy --all-targets -- -D warnings: clean
- cargo test --release: 292 lib + 6 + 44 + 19 + 6 + 54 = 421 tests, 0 failed
  (1 pre-existing #[ignore])

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/indexer/pipeline.rs                | 2374 ------------------------
 src/indexer/pipeline/context.rs        |  197 ++
 src/indexer/pipeline/embed.rs          |   71 +
 src/indexer/pipeline/index_files.rs    |  827 +++++++++
 src/indexer/pipeline/mod.rs            |  237 +++
 src/indexer/pipeline/python_modules.rs |   73 +
 src/indexer/pipeline/resolve.rs        |  204 ++
 src/indexer/pipeline/tests.rs          |  884 +++++++++
 8 files changed, 2493 insertions(+), 2374 deletions(-)
 delete mode 100644 src/indexer/pipeline.rs
 create mode 100644 src/indexer/pipeline/context.rs
 create mode 100644 src/indexer/pipeline/embed.rs
 create mode 100644 src/indexer/pipeline/index_files.rs
 create mode 100644 src/indexer/pipeline/mod.rs
 create mode 100644 src/indexer/pipeline/python_modules.rs
 create mode 100644 src/indexer/pipeline/resolve.rs
 create mode 100644 src/indexer/pipeline/tests.rs

diff --git a/src/indexer/pipeline.rs b/src/indexer/pipeline.rs
deleted file mode 100644
index 3bcd780..0000000
--- a/src/indexer/pipeline.rs
+++ /dev/null
@@ -1,2374 +0,0 @@
-use anyhow::Result;
-use std::collections::{HashMap, HashSet};
-use std::path::Path;
-
-use rayon::prelude::*;
-
-use crate::embedding::context::{build_context_string, NodeContext};
-use crate::embedding::model::EmbeddingModel;
-use crate::indexer::merkle::{compute_diff, hash_file, scan_directory, scan_directory_cached, DirectoryCache};
-use crate::parser::relations::extract_relations_from_tree;
-use crate::parser::treesitter::{parse_tree, extract_nodes_from_tree};
-use crate::search::tokenizer::split_identifier;
-use crate::storage::db::Database;
-use crate::storage::queries::{
-    delete_files_by_paths, delete_nodes_by_file,
-    get_all_file_hashes, get_all_node_names_with_ids, get_dirty_node_ids, get_edges_batch,
-    get_inbound_cross_file_edges,
-    get_nodes_by_file_path,
-    get_nodes_missing_context, get_nodes_with_files_by_ids,
-    insert_edge_cached, insert_node_cached,
-    insert_node_vectors_batch, update_context_strings_batch, upsert_file,
-    EdgeInfo, FileRecord, NodeRecord, NodeResult,
-};
-use crate::domain::{REL_CALLS, REL_IMPORTS, REL_INHERITS, REL_ROUTES_TO, REL_IMPLEMENTS, REL_EXPORTS, max_file_size, CROSS_FILE_CALL_NOISE};
-use crate::utils::config::detect_language;
-
-/// Counters for indexing observability — tracks skipped items.
-#[derive(Debug, Clone, Default)]
-pub struct IndexStats {
-    pub files_skipped_size: usize,
-    pub files_skipped_parse: usize,
-    pub files_skipped_read: usize,
-    pub files_skipped_hash: usize,
-    pub files_skipped_language: usize,
-}
-
-pub struct IndexResult {
-    pub files_indexed: usize,
-    pub nodes_created: usize,
-    pub edges_created: usize,
-    pub stats: IndexStats,
-}
-
-/// Progress callback: called with (files_done, files_total) after each batch.
-pub type ProgressFn<'a> = &'a dyn Fn(usize, usize);
-
-/// Extract "METHOD path" from route edge metadata JSON, falling back to the edge name.
-fn format_route_from_metadata(metadata: Option<&str>, name: &str) -> String {
-    if let Some(meta) = metadata {
-        if let Ok(v) = serde_json::from_str::<serde_json::Value>(meta) {
-            let method = v["method"].as_str().unwrap_or("ALL");
-            if let Some(path) = v["path"].as_str() {
-                return format!("{} {}", method, path);
-            }
-        }
-    }
-    name.to_string()
-}
-
-/// Embed context strings using batched inference and batch-insert vectors.
-/// Public so the background embedding thread in server.rs can call it.
-/// Wraps vector inserts in a transaction for atomicity and performance.
-pub fn embed_and_store_batch(db: &Database, model: &EmbeddingModel, context_updates: &[(i64, String)]) -> Result<()> {
-    if context_updates.is_empty() {
-        return Ok(());
-    }
-
-    let t0 = std::time::Instant::now();
-    let texts: Vec<&str> = context_updates.iter().map(|(_, ctx)| ctx.as_str()).collect();
-    let ids: Vec<i64> = context_updates.iter().map(|(id, _)| *id).collect();
-
-    let embeddings = match model.embed_batch(&texts) {
-        Ok(embs) => embs,
-        Err(e) => {
-            tracing::warn!("Batch embed failed, falling back to sequential: {}", e);
-            // Fallback: sequential embed
-            let mut embs = Vec::new();
-            for (i, text) in texts.iter().enumerate() {
-                match model.embed(text) {
-                    Ok(emb) => embs.push(Some(emb)),
-                    Err(e2) => {
-                        tracing::warn!("Failed to embed node {}: {}", ids[i], e2);
-                        embs.push(None);
-                    }
-                }
-            }
-            let vectors: Vec<(i64, Vec<f32>)> = ids.iter().zip(embs)
-                .filter_map(|(&id, emb)| emb.map(|e| (id, e)))
-                .collect();
-            if !vectors.is_empty() {
-                let tx = db.conn().unchecked_transaction()?;
-                insert_node_vectors_batch(db.conn(), &vectors)?;
-                tx.commit()?;
-            }
-            tracing::info!("[embed] {} nodes (sequential fallback) in {:.1}s",
-                context_updates.len(), t0.elapsed().as_secs_f64());
-            return Ok(());
-        }
-    };
-
-    let vectors: Vec<(i64, Vec<f32>)> = ids.into_iter().zip(embeddings).collect();
-    let t_embed = t0.elapsed();
-
-    if !vectors.is_empty() {
-        let tx = db.conn().unchecked_transaction()?;
-        insert_node_vectors_batch(db.conn(), &vectors)?;
-        tx.commit()?;
-    }
-
-    tracing::info!("[embed] {} nodes in {:.1}s (embed {:.1}s, store {:.1}s)",
-        context_updates.len(),
-        t0.elapsed().as_secs_f64(),
-        t_embed.as_secs_f64(),
-        (t0.elapsed() - t_embed).as_secs_f64(),
-    );
-    Ok(())
-}
-
-struct CategorizedEdges {
-    callees: Vec<String>,
-    callers: Vec<String>,
-    inherits: Vec<String>,
-    routes: Vec<String>,
-    imports: Vec<String>,
-    implements: Vec<String>,
-    exports: Vec<String>,
-}
-
-fn categorize_edges(edges: Option<&Vec<EdgeInfo>>, format_route: impl Fn(Option<&str>, &str) -> String) -> CategorizedEdges {
-    let mut result = CategorizedEdges {
-        callees: Vec::new(),
-        callers: Vec::new(),
-        inherits: Vec::new(),
-        routes: Vec::new(),
-        imports: Vec::new(),
-        implements: Vec::new(),
-        exports: Vec::new(),
-    };
-    if let Some(edge_list) = edges {
-        for (relation, direction, name, metadata) in edge_list {
-            match (relation.as_str(), direction.as_str()) {
-                (rel, "out") if rel == REL_CALLS => result.callees.push(name.clone()),
-                (rel, "in") if rel == REL_CALLS => result.callers.push(name.clone()),
-                (rel, "out") if rel == REL_INHERITS => result.inherits.push(name.clone()),
-                (rel, "out") if rel == REL_ROUTES_TO => {
-                    result.routes.push(format_route(metadata.as_deref(), name));
-                }
-                (rel, "out") if rel == REL_IMPORTS => result.imports.push(name.clone()),
-                (rel, "out") if rel == REL_IMPLEMENTS => result.implements.push(name.clone()),
-                (rel, "out") if rel == REL_EXPORTS => result.exports.push(name.clone()),
-                _ => {}
-            }
-        }
-    }
-    result
-}
-
-pub fn run_full_index(db: &Database, project_root: &Path, model: Option<&EmbeddingModel>, progress: Option<ProgressFn>) -> Result<IndexResult> {
-    let current_hashes = scan_directory(project_root)?;
-    let files: Vec<String> = current_hashes.keys().cloned().collect();
-    index_files(db, project_root, &files, &current_hashes, model, &[], progress)
-}
-
-/// Reindex a single file when its on-disk hash differs from the stored hash.
-/// No-op when the hashes match (or `rel_path` was never indexed in a way that
-/// would currently reindex it). Returns true when a reindex (or stale-row
-/// cleanup) actually fired.
-///
-/// Used by query-time freshness: when an MCP tool receives an explicit
-/// `file_path` argument, the agent is signaling "I just edited this; please
-/// answer against the current bytes." The 30s `last_incremental_check`
-/// debounce in the server is too coarse for tight Edit→search loops.
-///
-/// Cross-file dirty-edge handling mirrors `run_incremental_index`: collect
-/// dirty node IDs **before** re-indexing (cascade delete strips old edges),
-/// then regenerate context strings + embeddings once the new nodes exist.
-pub fn ensure_file_indexed(
-    db: &Database,
-    project_root: &Path,
-    rel_path: &str,
-    model: Option<&EmbeddingModel>,
-) -> Result<bool> {
-    let abs_path = project_root.join(rel_path);
-
-    // Missing-file path: drop stale row so future queries don't return phantom nodes.
-    if !abs_path.is_file() {
-        let exists_in_db: Option<i64> = db.conn().query_row(
-            "SELECT id FROM files WHERE path = ?1",
-            [rel_path],
-            |row| row.get(0),
-        ).ok();
-        if exists_in_db.is_some() {
-            let tx = db.conn().unchecked_transaction()?;
-            delete_files_by_paths(db.conn(), &[rel_path.to_string()])?;
-            tx.commit()?;
-            return Ok(true);
-        }
-        return Ok(false);
-    }
-
-    // Skip files we wouldn't index in the first place (binary / wrong language).
-    if crate::utils::config::detect_language(rel_path).is_none() {
-        return Ok(false);
-    }
-
-    let on_disk_hash = crate::indexer::merkle::hash_file(&abs_path)?;
-    let stored_hash: Option<String> = db.conn().query_row(
-        "SELECT blake3_hash FROM files WHERE path = ?1",
-        [rel_path],
-        |row| row.get(0),
-    ).ok();
-
-    if stored_hash.as_deref() == Some(&on_disk_hash) {
-        return Ok(false);
-    }
-
-    // Cross-file edges into this file's nodes need their context strings rebuilt
-    // *after* the node IDs are replaced — capture the dirty set BEFORE re-indexing.
-    let dirty_node_ids = collect_dirty_node_ids(db, std::slice::from_ref(&rel_path.to_string()))?;
-
-    let mut hashes: HashMap<String, String> = HashMap::new();
-    hashes.insert(rel_path.to_string(), on_disk_hash);
-    let files = vec![rel_path.to_string()];
-    index_files(db, project_root, &files, &hashes, model, &[], None)?;
-
-    if !dirty_node_ids.is_empty() {
-        regenerate_context_strings(db, &dirty_node_ids, model)?;
-    }
-    Ok(true)
-}
-
-pub fn run_incremental_index(db: &Database, project_root: &Path, model: Option<&EmbeddingModel>, progress: Option<ProgressFn>) -> Result<IndexResult> {
-    let start = std::time::Instant::now();
-    let stored_hashes = get_all_file_hashes(db.conn())?;
-    let current_hashes = scan_directory(project_root)?;
-    let diff = compute_diff(&stored_hashes, &current_hashes);
-
-    // Preserve <external> pseudo-file across incremental indexes
-    let deleted_files: Vec<String> = diff.deleted_files.into_iter()
-        .filter(|p| p != "<external>")
-        .collect();
-    let to_index: Vec<String> = [diff.new_files, diff.changed_files].concat();
-
-    let dirty_node_ids = if !to_index.is_empty() {
-        collect_dirty_node_ids(db, &to_index)?
-    } else {
-        HashSet::new()
-    };
-
-    let result = index_files(db, project_root, &to_index, &current_hashes, model, &deleted_files, progress)?;
-
-    if !dirty_node_ids.is_empty() {
-        regenerate_context_strings(db, &dirty_node_ids, model)?;
-    }
-
-    if result.files_indexed > 0 || !deleted_files.is_empty() {
-        tracing::info!(
-            "[incremental] {} files changed, {} deleted, {} nodes, {} edges, {:.1}s",
-            result.files_indexed, deleted_files.len(),
-            result.nodes_created, result.edges_created,
-            start.elapsed().as_secs_f64()
-        );
-    }
-
-    Ok(result)
-}
-
-/// Incremental index with directory mtime cache for faster scanning.
-/// Files in unchanged directories are skipped entirely.
-pub fn run_incremental_index_cached(
-    db: &Database,
-    project_root: &Path,
-    model: Option<&EmbeddingModel>,
-    dir_cache: Option<&DirectoryCache>,
-    progress: Option<ProgressFn>,
-) -> Result<(IndexResult, DirectoryCache)> {
-    let start = std::time::Instant::now();
-    let stored_hashes = get_all_file_hashes(db.conn())?;
-    let (mut current_hashes, new_cache) = scan_directory_cached(project_root, dir_cache)?;
-
-    // Merge stored hashes for files in unchanged directories.
-    // scan_directory_cached skips files in unchanged dirs, so we need to
-    // carry forward their stored hashes to prevent false "deleted" diffs.
-    // Use new_cache.file_mtimes (populated for ALL walked files) to check existence
-    // without per-file stat calls.
-    for (path, hash) in &stored_hashes {
-        if !current_hashes.contains_key(path) && new_cache.file_exists(path) {
-            current_hashes.insert(path.clone(), hash.clone());
-        }
-    }
-
-    let diff = compute_diff(&stored_hashes, &current_hashes);
-
-    // Preserve <external> pseudo-file across incremental indexes
-    let deleted_files: Vec<String> = diff.deleted_files.into_iter()
-        .filter(|p| p != "<external>")
-        .collect();
-    let to_index: Vec<String> = [diff.new_files, diff.changed_files].concat();
-
-    let dirty_node_ids = if !to_index.is_empty() {
-        collect_dirty_node_ids(db, &to_index)?
-    } else {
-        HashSet::new()
-    };
-
-    let result = index_files(db, project_root, &to_index, &current_hashes, model, &deleted_files, progress)?;
-
-    if !dirty_node_ids.is_empty() {
-        regenerate_context_strings(db, &dirty_node_ids, model)?;
-    }
-
-    if result.files_indexed > 0 || !deleted_files.is_empty() {
-        tracing::info!(
-            "[incremental] {} files changed, {} deleted, {} nodes, {} edges, {:.1}s",
-            result.files_indexed, deleted_files.len(),
-            result.nodes_created, result.edges_created,
-            start.elapsed().as_secs_f64()
-        );
-    }
-
-    Ok((result, new_cache))
-}
-
-/// Collect node IDs in OTHER files that have edges pointing to nodes in the changed files.
-/// Must be called BEFORE re-indexing (cascade delete removes old edges).
-fn collect_dirty_node_ids(db: &Database, changed_paths: &[String]) -> Result<HashSet<i64>> {
-    let mut changed_file_ids = Vec::new();
-    for path in changed_paths {
-        let file_id: Option<i64> = db.conn().query_row(
-            "SELECT id FROM files WHERE path = ?1",
-            [path],
-            |row| row.get(0),
-        ).ok();
-        if let Some(id) = file_id {
-            changed_file_ids.push(id);
-        }
-    }
-    let ids = get_dirty_node_ids(db.conn(), &changed_file_ids)?;
-    Ok(ids.into_iter().collect())
-}
-
-/// Regenerate context strings (and embeddings) for the given set of dirty nodes.
-fn regenerate_context_strings(db: &Database, dirty_ids: &HashSet<i64>, model: Option<&EmbeddingModel>) -> Result<()> {
-    let tx = db.conn().unchecked_transaction()?;
-    let id_vec: Vec<i64> = dirty_ids.iter().copied().collect();
-    let all_edges = get_edges_batch(db.conn(), &id_vec)?;
-    let all_nodes: HashMap<i64, (NodeResult, String, Option<String>)> = {
-        let nwfs = get_nodes_with_files_by_ids(db.conn(), &id_vec)?;
-        nwfs.into_iter().map(|nwf| (nwf.node.id, (nwf.node, nwf.file_path, nwf.language))).collect()
-    };
-
-    // Build all context strings first
-    let mut context_updates: Vec<(i64, String)> = Vec::with_capacity(dirty_ids.len());
-    for &node_id in dirty_ids {
-        if let Some((node, file_path, language)) = all_nodes.get(&node_id) {
-            let edges = all_edges.get(&node_id);
-            let cat = categorize_edges(edges, format_route_from_metadata);
-
-            let ctx = build_context_string(&NodeContext {
-                node_type: node.node_type.clone(),
-                name: node.name.clone(),
-                qualified_name: node.qualified_name.clone(),
-                file_path: file_path.clone(),
-                language: language.clone(),
-                signature: node.signature.clone(),
-                return_type: node.return_type.clone(),
-                param_types: node.param_types.clone(),
-                code_content: Some(node.code_content.clone()),
-                routes: cat.routes,
-                callees: cat.callees,
-                callers: cat.callers,
-                inherits: cat.inherits,
-                imports: cat.imports,
-                implements: cat.implements,
-                exports: cat.exports,
-                doc_comment: node.doc_comment.clone(),
-            });
-
-            context_updates.push((node_id, ctx));
-        }
-    }
-
-    // Batch update context strings
-    update_context_strings_batch(db.conn(), &context_updates)?;
-    tx.commit()?;
-
-    // Embed outside the committed tx — recoverable on failure
-    if let Some(m) = model {
-        if db.vec_enabled() {
-            embed_and_store_batch(db, m, &context_updates)?;
-        }
-    }
-    Ok(())
-}
-
-/// Repair nodes that have NULL context_string (likely from a failed Phase 3).
-/// This is called at startup after index verification.
-pub fn repair_null_context_strings(
-    db: &Database,
-    model: Option<&EmbeddingModel>,
-) -> Result<usize> {
-    let missing_ids = get_nodes_missing_context(db.conn())?;
-    if missing_ids.is_empty() {
-        return Ok(0);
-    }
-
-    tracing::info!("[repair] Found {} nodes with NULL context_string, rebuilding...", missing_ids.len());
-
-    // Load node details with file paths
-    let nodes_with_files = get_nodes_with_files_by_ids(db.conn(), &missing_ids)?;
-
-    // Load edges for all affected nodes in one batch
-    let all_edges = get_edges_batch(db.conn(), &missing_ids)?;
-
-    // Build context strings
-    let mut context_updates: Vec<(i64, String)> = Vec::new();
-    for nwf in &nodes_with_files {
-        let node = &nwf.node;
-        let edges = all_edges.get(&node.id);
-        let cat = categorize_edges(edges, format_route_from_metadata);
-
-        let ctx = build_context_string(&NodeContext {
-            node_type: node.node_type.clone(),
-            name: node.name.clone(),
-            qualified_name: node.qualified_name.clone(),
-            file_path: nwf.file_path.clone(),
-            language: nwf.language.clone(),
-            signature: node.signature.clone(),
-            return_type: node.return_type.clone(),
-            param_types: node.param_types.clone(),
-            code_content: Some(node.code_content.clone()),
-            routes: cat.routes,
-            callees: cat.callees,
-            callers: cat.callers,
-            inherits: cat.inherits,
-            imports: cat.imports,
-            implements: cat.implements,
-            exports: cat.exports,
-            doc_comment: node.doc_comment.clone(),
-        });
-
-        context_updates.push((node.id, ctx));
-    }
-
-    // Update in DB within a transaction (avoids per-row fsync under autocommit)
-    if !context_updates.is_empty() {
-        let tx = db.conn().unchecked_transaction()?;
-        update_context_strings_batch(db.conn(), &context_updates)?;
-        tx.commit()?;
-
-        // Re-embed if model available
-        if let Some(m) = model {
-            if db.vec_enabled() {
-                embed_and_store_batch(db, m, &context_updates)?;
-            }
-        }
-    }
-
-    let count = context_updates.len();
-    tracing::info!("[repair] Repaired context strings for {} nodes", count);
-    Ok(count)
-}
-
-/// Batch size for streaming indexing. Each batch processes Phase 1+2
-/// then drops heavyweight data (ASTs, source strings) before the next batch.
-const BATCH_SIZE: usize = 500;
-
-/// Lightweight post-batch record — no Tree or source string.
-struct FileIndexed {
-    rel_path: String,
-    node_ids: Vec<i64>,
-    node_names: Vec<String>,
-}
-
-/// Build mapping from Python dotted module paths to file paths.
-/// Registers both full paths and suffix paths for flexible matching.
-/// e.g., "src/myapp/utils.py" matches "src.myapp.utils", "myapp.utils", and "utils".
-fn build_python_module_map(python_paths: &HashSet<String>) -> HashMap<String, Vec<String>> {
-    let mut map: HashMap<String, Vec<String>> = HashMap::new();
-    for path in python_paths {
-        let stripped = if let Some(s) = path.strip_suffix("/__init__.py") {
-            s
-        } else if let Some(s) = path.strip_suffix(".py") {
-            s
-        } else {
-            continue;
-        };
-
-        // Register all suffix module paths for flexible matching
-        // e.g., "src/myapp/utils" -> "src.myapp.utils", "myapp.utils", "utils"
-        let parts: Vec<&str> = stripped.split('/').collect();
-        for i in 0..parts.len() {
-            let dotted = parts[i..].join(".");
-            map.entry(dotted).or_default().push(path.clone());
-        }
-    }
-    // Deduplicate
-    for paths in map.values_mut() {
-        paths.sort();
-        paths.dedup();
-    }
-    map
-}
-
-/// Resolve Python import targets using pre-parsed module metadata.
-/// For `import X` (is_module_import): finds `<module>` nodes in resolved files.
-/// For `from X import Y`: finds nodes named Y only in resolved files.
-/// Returns None if module can't be resolved or no matching nodes found.
-fn resolve_python_module_targets(
-    python_module: &str,
-    is_module_import: bool,
-    target_name: &str,
-    python_module_map: &HashMap<String, Vec<String>>,
-    node_id_to_path: &HashMap<i64, String>,
-    name_to_ids: &HashMap<String, Vec<i64>>,
-) -> Option<Vec<i64>> {
-    // Resolve module path to file path(s).
-    // Note: suffix matching in python_module_map means `import utils` may match
-    // multiple files (e.g., "myapp/utils.py" and "other/utils.py"). This is an
-    // inherent ambiguity without sys.path context; over-connecting is safer for
-    // dependency analysis than missing real dependencies.
-    let module_files = python_module_map.get(python_module)?;
-
-    let lookup_name = if is_module_import { "<module>" } else { target_name };
-    let all_ids = name_to_ids.get(lookup_name)?;
-    let targets: Vec<i64> = all_ids.iter()
-        .filter(|nid| {
-            node_id_to_path.get(nid)
-                .map(|p| module_files.contains(p))
-                .unwrap_or(false)
-        })
-        .copied()
-        .collect();
-    if targets.is_empty() { None } else { Some(targets) }
-}
-
-fn index_files(
-    db: &Database,
-    root: &Path,
-    files: &[String],
-    hashes: &HashMap<String, String>,
-    model: Option<&EmbeddingModel>,
-    delete_paths: &[String],
-    progress: Option<ProgressFn>,
-) -> Result<IndexResult> {
-    // SAFETY: unchecked_transaction is used because rusqlite's Transaction borrows
-    // &mut Connection, preventing other borrows during the transaction. Here we need
-    // both the transaction and read access via db.conn() (which returns &Connection
-    // to the same underlying connection). This is safe because:
-    // (1) db.conn() returns the same Connection the tx was opened on,
-    // (2) we never open nested transactions,
-    // (3) concurrent access (e.g. background embedding thread) uses separate
-    //     DB connections; safety relies on SQLite WAL mode + busy_timeout(5000),
-    //     not single-threadedness.
-
-    use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
-    let skipped_size = AtomicUsize::new(0);
-    let skipped_parse = AtomicUsize::new(0);
-    let skipped_read = AtomicUsize::new(0);
-    let skipped_hash = AtomicUsize::new(0);
-    let skipped_language = AtomicUsize::new(0);
-
-    let mut total_nodes_created = 0usize;
-    let mut total_edges_created = 0usize;
-    let mut all_indexed: Vec<FileIndexed> = Vec::new();
-
-    // Phase 0: Delete removed files in own transaction.
-    //
-    // Before cascade strips inbound REL_CALLS edges, capture them as pending
-    // rows. Without this, deleting file A wipes B's edge to A.foo and B is
-    // not in `delete_paths` (so Phase 2 won't re-extract it), leaving B with
-    // neither an edge nor a pending row — the same staleness window the
-    // "callee added later" buffering closes, just from the deletion side.
-    // Both directions need to round-trip through pending or the v0.18.2 fix
-    // is only half-complete.
-    if !delete_paths.is_empty() {
-        let tx = db.conn().unchecked_transaction()?;
-
-        // Resolve file IDs once (delete_files_by_paths drops them) so we can
-        // query inbound calls before cascade fires.
-        let mut deleted_file_ids: Vec<i64> = Vec::with_capacity(delete_paths.len());
-        for path in delete_paths {
-            if let Ok(Some(fid)) = db.conn().query_row(
-                "SELECT id FROM files WHERE path = ?1",
-                [path],
-                |row| row.get::<_, Option<i64>>(0),
-            ) {
-                deleted_file_ids.push(fid);
-            }
-        }
-
-        let mut buffered = 0usize;
-        for fid in &deleted_file_ids {
-            let inbound = crate::storage::queries::get_inbound_calls_for_pending(db.conn(), *fid)?;
-            for (source_id, target_name, source_language, metadata) in inbound {
-                crate::storage::queries::insert_pending_unresolved_call(
-                    db.conn(),
-                    source_id,
-                    &target_name,
-                    &source_language,
-                    metadata.as_deref(),
-                )?;
-                buffered += 1;
-            }
-        }
-        if buffered > 0 {
-            tracing::info!(
-                "[index] Phase 0: buffered {} inbound calls before cascade-deleting {} file(s)",
-                buffered, deleted_file_ids.len()
-            );
-        }
-
-        delete_files_by_paths(db.conn(), delete_paths)?;
-        tx.commit()?;
-    }
-
-    // CPU-bound parse result — produced in parallel, consumed sequentially for DB insert
-    struct FilePreParsed {
-        rel_path: String,
-        source: String,
-        language: String,
-        tree: tree_sitter::Tree,
-        hash: String,
-        last_modified: i64,
-        parsed_nodes: Vec<crate::parser::treesitter::ParsedNode>,
-    }
-
-    // Pre-build Python module map once (used in all batches for import resolution)
-    let mut all_python_paths: HashSet<String> = files.iter()
-        .filter(|f| f.ends_with(".py"))
-        .cloned()
-        .collect();
-    {
-        let mut stmt = db.conn().prepare("SELECT path FROM files WHERE path LIKE '%.py'")?;
-        let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
-        for row in rows {
-            all_python_paths.insert(row?);
-        }
-    }
-    let python_module_map = build_python_module_map(&all_python_paths);
-
-    // Pre-load global name->[(id, path, language)] map once before the batch loop.
-    // This avoids a full table scan per batch in Phase 2 relation resolution.
-    // The map is updated incrementally as each batch commits new nodes.
-    // `language` drives same-language-preferred resolution to avoid cross-language
-    // bare-name collisions (e.g. Rust `hasher.update()` resolving to JS `function update`).
-    let mut global_name_map: HashMap<String, Vec<crate::storage::queries::NameEntry>> =
-        get_all_node_names_with_ids(db.conn())?;
-
-    // Heavyweight per-file data used during Phase 1+2, dropped after each batch
-    #[allow(dead_code)]
-    struct FileParsed {
-        rel_path: String,
-        source: String,
-        language: String,
-        tree: tree_sitter::Tree,
-        file_id: i64,
-        node_ids: Vec<i64>,
-        node_names: Vec<String>,
-    }
-
-    // Process files in batches — each batch does Phase 1 + Phase 2
-    for batch in files.chunks(BATCH_SIZE) {
-        let tx = db.conn().unchecked_transaction()?;
-
-        // --- Phase 1a: Parallel CPU-bound work (read + parse + extract nodes) ---
-        let pre_parsed: Vec<FilePreParsed> = batch
-            .par_iter()
-            .filter_map(|rel_path| {
-                let language = match detect_language(rel_path) {
-                    Some(l) => l,
-                    None => {
-                        skipped_language.fetch_add(1, AtomicOrdering::Relaxed);
-                        return None;
-                    }
-                };
-                let abs_path = root.join(rel_path);
-
-                let file_meta = std::fs::metadata(&abs_path).ok();
-                if let Some(ref meta) = file_meta {
-                    if meta.len() > max_file_size() {
-                        tracing::debug!("Skipping large file ({} bytes): {}", meta.len(), rel_path);
-                        skipped_size.fetch_add(1, AtomicOrdering::Relaxed);
-                        return None;
-                    }
-                }
-
-                let source = match std::fs::read_to_string(&abs_path) {
-                    Ok(s) => s,
-                    Err(e) => {
-                        tracing::warn!("Skipping file {}: {}", rel_path, e);
-                        skipped_read.fetch_add(1, AtomicOrdering::Relaxed);
-                        return None;
-                    }
-                };
-
-                let hash = match hashes.get(rel_path.as_str()) {
-                    Some(h) => h.clone(),
-                    None => match hash_file(&abs_path) {
-                        Ok(h) => h,
-                        Err(e) => {
-                            tracing::warn!("Skipping file (hash error): {}: {}", rel_path, e);
-                            skipped_hash.fetch_add(1, AtomicOrdering::Relaxed);
-                            return None;
-                        }
-                    },
-                };
-
-                let tree = match parse_tree(&source, language) {
-                    Ok(t) => t,
-                    Err(e) => {
-                        tracing::warn!("Parse failed for {}: {}", rel_path, e);
-                        skipped_parse.fetch_add(1, AtomicOrdering::Relaxed);
-                        return None;
-                    }
-                };
-
-                let last_modified = file_meta
-                    .and_then(|m| m.modified().ok())
-                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
-                    .map(|d| d.as_secs() as i64)
-                    .unwrap_or(0);
-
-                let parsed_nodes = extract_nodes_from_tree(&tree, &source, language);
-
-                Some(FilePreParsed {
-                    rel_path: rel_path.clone(),
-                    source,
-                    language: language.to_string(),
-                    tree,
-                    hash,
-                    last_modified,
-                    parsed_nodes,
-                })
-            })
-            .collect();
-
-        let mut batch_parsed: Vec<FileParsed> = Vec::new();
-        // Saved inbound edges from other files → batch files (to restore after cascade delete)
-        // Tuple: (source_id, source_file_id, target_name, relation, metadata)
-        let mut saved_inbound_edges: Vec<(i64, i64, String, String, Option<String>)> = Vec::new();
-        // Track file_ids in this batch to filter intra-batch edges in Phase 2c
-        let mut batch_file_ids: HashSet<i64> = HashSet::new();
-
-        // --- Phase 1b: Sequential DB inserts ---
-        for pp in pre_parsed {
-            let file_id = upsert_file(db.conn(), &FileRecord {
-                path: pp.rel_path.clone(),
-                blake3_hash: pp.hash,
-                last_modified: pp.last_modified,
-                language: Some(pp.language.clone()),
-            })?;
-
-            // Save cross-file inbound edges before cascade delete destroys them
-            saved_inbound_edges.extend(get_inbound_cross_file_edges(db.conn(), file_id)?);
-            batch_file_ids.insert(file_id);
-
-            delete_nodes_by_file(db.conn(), file_id)?;
-
-            let mut node_ids = Vec::new();
-            let mut node_names = Vec::new();
-
-            let module_node_id = insert_node_cached(db.conn(), &NodeRecord {
-                file_id,
-                node_type: "module".into(),
-                name: "<module>".into(),
-                qualified_name: Some(pp.rel_path.clone()),
-                start_line: 1,
-                end_line: pp.source.lines().count() as i64,
-                code_content: String::new(),
-                signature: None,
-                doc_comment: None,
-                context_string: None,
-                name_tokens: None,
-                return_type: None,
-                param_types: None,
-                is_test: false,
-            })?;
-            node_ids.push(module_node_id);
-            node_names.push("<module>".into());
-            total_nodes_created += 1;
-
-            for pn in &pp.parsed_nodes {
-                let name_tokens = split_identifier(&pn.name);
-                let node_id = insert_node_cached(db.conn(), &NodeRecord {
-                    file_id,
-                    node_type: pn.node_type.clone(),
-                    name: pn.name.clone(),
-                    qualified_name: pn.qualified_name.clone(),
-                    start_line: pn.start_line as i64,
-                    end_line: pn.end_line as i64,
-                    code_content: pn.code_content.clone(),
-                    signature: pn.signature.clone(),
-                    doc_comment: pn.doc_comment.clone(),
-                    context_string: None,
-                    name_tokens: Some(name_tokens),
-                    return_type: pn.return_type.clone(),
-                    param_types: pn.param_types.clone(),
-                    is_test: pn.is_test,
-                })?;
-                node_ids.push(node_id);
-                node_names.push(pn.name.clone());
-                total_nodes_created += 1;
-            }
-
-            batch_parsed.push(FileParsed {
-                rel_path: pp.rel_path,
-                source: pp.source,
-                language: pp.language,
-                tree: pp.tree,
-                file_id,
-                node_ids,
-                node_names,
-            });
-        }
-
-        // --- Phase 2: Extract relations + insert edges ---
-        // Build per-batch name_to_ids and node_id_to_path from the pre-loaded global map,
-        // excluding files in the current batch (their old nodes were deleted in Phase 1b).
-        let batch_file_paths: HashSet<&str> = batch_parsed.iter()
-            .map(|pf| pf.rel_path.as_str()).collect();
-
-        let mut name_to_ids: HashMap<String, Vec<i64>> = HashMap::new();
-        let mut node_id_to_path: HashMap<i64, String> = HashMap::new();
-        // Per-node language for same-language-preferred edge resolution (§ cross-lang collision).
-        let mut node_id_to_language: HashMap<i64, Option<String>> = HashMap::new();
-
-        // Add current batch's newly inserted nodes
-        for pf in &batch_parsed {
-            for (id, name) in pf.node_ids.iter().zip(pf.node_names.iter()) {
-                name_to_ids.entry(name.clone()).or_default().push(*id);
-                node_id_to_path.insert(*id, pf.rel_path.clone());
-                node_id_to_language.insert(*id, Some(pf.language.clone()));
-            }
-        }
-
-        // Add nodes from the global map, excluding those in current batch's files
-        // (their old nodes were deleted and replaced by new ones above)
-        for (name, entries) in &global_name_map {
-            for (id, path, language) in entries {
-                if !batch_file_paths.contains(path.as_str()) {
-                    name_to_ids.entry(name.clone()).or_default().push(*id);
-                    node_id_to_path.insert(*id, path.clone());
-                    node_id_to_language.insert(*id, language.clone());
-                }
-            }
-        }
-
-        for ids in name_to_ids.values_mut() {
-            ids.sort();
-            ids.dedup();
-        }
-
-        // Track unresolved external Python imports: (source_module_node_id, module_name)
-        let mut external_python_imports: Vec<(i64, String)> = Vec::new();
-        // Track unresolved external symbols for sentinel node creation:
-        // (source_id, target_name, relation) — e.g., implements edges to external traits
-        let mut unresolved_externals: Vec<(i64, String, String)> = Vec::new();
-
-        for pf in &batch_parsed {
-            let relations = extract_relations_from_tree(&pf.tree, &pf.source, &pf.language);
-            let local_ids: HashSet<i64> = pf.node_ids.iter().copied().collect();
-
-            for rel in &relations {
-                // Contract: extract_relations_from_tree stamps every relation with
-                // source_language equal to the language argument. The
-                // same-language resolution at line 811+ depends on it. Hard
-                // error instead of debug_assert so a parser regression fails
-                // loudly in release builds too (one string compare per
-                // relation is negligible against the SQL writes below).
-                if rel.source_language != pf.language {
-                    anyhow::bail!(
-                        "ParsedRelation.source_language ({}) does not match file language ({}); \
-                         parser regressed the source_language contract",
-                        rel.source_language, pf.language
-                    );
-                }
-
-                let source_ids = pf.node_names.iter()
-                    .zip(pf.node_ids.iter())
-                    .filter(|(name, _)| *name == &rel.source_name)
-                    .map(|(_, id)| *id)
-                    .collect::<Vec<_>>();
-
-                // Try Python module-constrained resolution for import edges
-                if rel.relation == REL_IMPORTS {
-                    if let Some(ref meta_str) = rel.metadata {
-                        if let Ok(meta) = serde_json::from_str::<serde_json::Value>(meta_str) {
-                            if let Some(python_module) = meta.get("python_module").and_then(|v| v.as_str()) {
-                                let is_module_import = meta.get("is_module_import")
-                                    .and_then(|v| v.as_bool()).unwrap_or(false);
-                                if python_module_map.contains_key(python_module) {
-                                    // Internal module — try constrained resolution
-                                    if let Some(module_targets) = resolve_python_module_targets(
-                                        python_module, is_module_import, &rel.target_name,
-                                        &python_module_map, &node_id_to_path, &name_to_ids,
-                                    ) {
-                                        for &src_id in &source_ids {
-                                            for &tgt_id in &module_targets {
-                                                if src_id != tgt_id
-                                                    && insert_edge_cached(db.conn(), src_id, tgt_id, &rel.relation, rel.metadata.as_deref())? {
-                                                    total_edges_created += 1;
-                                                }
-                                            }
-                                        }
-                                        continue;
-                                    }
-                                    // Module found but symbol not found — fall through to default
-                                } else {
-                                    // External module — track for virtual node creation.
-                                    // For `from X import Y`, we track the module-level dependency (X),
-                                    // not the individual symbol (Y), since we can't index external code.
-                                    for &src_id in &source_ids {
-                                        external_python_imports.push((src_id, python_module.to_string()));
-                                    }
-                                    continue; // No point in default resolution for external imports
-                                }
-                            }
-                        }
-                    }
-                }
-
-                // Default resolution: global name-based lookup with language-aware layering.
-                // Tier order: same-file → same-language → (calls: drop) / (other: global).
-                // Dropping calls without a same-language match prevents Rust `hasher.update()`
-                // binding to an unrelated JS `function update()` via bare-name collision.
-                let all_target_ids = name_to_ids.get(&rel.target_name)
-                    .cloned()
-                    .unwrap_or_default();
-
-                let same_file_targets: Vec<i64> = all_target_ids.iter()
-                    .filter(|id| local_ids.contains(id))
-                    .copied()
-                    .collect();
-
-                let source_lang = pf.language.as_str();
-                let same_language_targets: Vec<i64> = all_target_ids.iter()
-                    .filter(|id| !local_ids.contains(id))
-                    .filter(|id| matches!(
-                        node_id_to_language.get(id).and_then(|l| l.as_deref()),
-                        Some(l) if l == source_lang
-                    ))
-                    .copied()
-                    .collect();
-
-                let target_ids = if !same_file_targets.is_empty() {
-                    same_file_targets
-                } else if rel.relation == REL_CALLS
-                    && CROSS_FILE_CALL_NOISE.contains(&rel.target_name.as_str())
-                {
-                    // Stdlib method names (new/default/from) — drop regardless of language.
-                    continue;
-                } else if !same_language_targets.is_empty() {
-                    // Ambiguous cross-file same-language candidates (e.g. a helper
-                    // name like `readJson` defined in multiple JS files) used to
-                    // fan out — every same-name target got an edge, producing
-                    // phantom callers across unrelated modules. Refine by
-                    // non-test preference + longest common path prefix with the
-                    // caller file. See `refine_ambiguous_targets` for fallback
-                    // policy (keeps remaining pool on ambiguity to avoid
-                    // regressing dead-code on bare-name Rust scoped calls).
-                    refine_ambiguous_targets(
-                        &same_language_targets,
-                        &pf.rel_path,
-                        &node_id_to_path,
-                    )
-                } else if rel.relation == REL_CALLS {
-                    // No same-file, no same-language candidate → buffer in
-                    // pending_unresolved_calls instead of silently dropping.
-                    // The post-Phase-2 sweep below promotes the row to a real
-                    // edge as soon as a same-language target appears (e.g.
-                    // sibling file added in a later incremental pass). Memory
-                    // `feedback_incremental_edge_timing.md` documented the bug
-                    // this closes: B's bare-name call to `foo()` got dropped
-                    // when foo didn't exist yet, and never re-resolved when A
-                    // later added `foo`. Schema cascade on source_id self-cleans
-                    // when callers are removed/reindexed.
-                    for &src_id in &source_ids {
-                        crate::storage::queries::insert_pending_unresolved_call(
-                            db.conn(),
-                            src_id,
-                            &rel.target_name,
-                            &pf.language,
-                            rel.metadata.as_deref(),
-                        )?;
-                    }
-                    continue;
-                } else {
-                    all_target_ids
-                };
-
-                if target_ids.is_empty()
-                    && (rel.relation == REL_IMPLEMENTS || rel.relation == REL_IMPORTS)
-                {
-                    // Unresolved implements target (external trait like Write, Default)
-                    // OR unresolved import target (JS `require('fs')`, unresolved JS
-                    // ES-import binding). Phase 2b-ext creates `<external>/<name>`
-                    // sentinel nodes so the dependency graph shows the link.
-                    for &src_id in &source_ids {
-                        unresolved_externals.push((src_id, rel.target_name.clone(), rel.relation.clone()));
-                    }
-                } else {
-                    for &src_id in &source_ids {
-                        for &tgt_id in &target_ids {
-                            if (src_id != tgt_id || rel.relation == REL_ROUTES_TO)
-                                && insert_edge_cached(db.conn(), src_id, tgt_id, &rel.relation, rel.metadata.as_deref())? {
-                                total_edges_created += 1;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        // Phase 2b: Create virtual nodes for external Python imports
-        if !external_python_imports.is_empty() {
-            let ext_file_id = upsert_file(db.conn(), &FileRecord {
-                path: "<external>".into(),
-                blake3_hash: "external".into(),
-                last_modified: 0,
-                language: Some("external".into()),
-            })?;
-
-            // Load existing external module nodes to avoid duplicates
-            let existing_ext_nodes: HashMap<String, i64> =
-                get_nodes_by_file_path(db.conn(), "<external>")?
-                    .into_iter()
-                    .map(|n| (n.name.clone(), n.id))
-                    .collect();
-
-            let unique_modules: HashSet<String> = external_python_imports.iter()
-                .map(|(_, m)| m.clone()).collect();
-
-            let mut ext_node_ids: HashMap<String, i64> = existing_ext_nodes;
-            for module_name in &unique_modules {
-                if !ext_node_ids.contains_key(module_name) {
-                    let node_id = insert_node_cached(db.conn(), &NodeRecord {
-                        file_id: ext_file_id,
-                        node_type: "external_module".into(),
-                        name: module_name.clone(),
-                        qualified_name: Some(format!("<external>/{}", module_name)),
-                        start_line: 0,
-                        end_line: 0,
-                        code_content: String::new(),
-                        signature: None,
-                        doc_comment: None,
-                        context_string: None,
-                        name_tokens: None,
-                        return_type: None,
-                        param_types: None,
-                        is_test: false,
-                    })?;
-                    ext_node_ids.insert(module_name.clone(), node_id);
-                    total_nodes_created += 1;
-                }
-            }
-
-            for (source_id, module_name) in &external_python_imports {
-                if let Some(&ext_id) = ext_node_ids.get(module_name) {
-                    if insert_edge_cached(db.conn(), *source_id, ext_id, REL_IMPORTS, None)? {
-                        total_edges_created += 1;
-                    }
-                }
-            }
-        }
-
-        // Phase 2b-ext: Create sentinel nodes for unresolved external symbols
-        // (e.g., Rust `impl Write for SharedStdout` where Write is from std::io)
-        if !unresolved_externals.is_empty() {
-            let ext_file_id = upsert_file(db.conn(), &FileRecord {
-                path: "<external>".into(),
-                blake3_hash: "external".into(),
-                last_modified: 0,
-                language: Some("external".into()),
-            })?;
-
-            let existing_ext_nodes: HashMap<String, i64> =
-                get_nodes_by_file_path(db.conn(), "<external>")?
-                    .into_iter()
-                    .map(|n| (n.name.clone(), n.id))
-                    .collect();
-
-            let mut ext_node_ids: HashMap<String, i64> = existing_ext_nodes;
-
-            // Collect unique targets with inferred type
-            let unique_targets: HashMap<&str, &str> = unresolved_externals.iter()
-                .map(|(_, name, rel)| {
-                    let node_type = if rel == REL_IMPLEMENTS { "trait" } else { "module" };
-                    (name.as_str(), node_type)
-                })
-                .collect();
-
-            for (&name, &node_type) in &unique_targets {
-                if !ext_node_ids.contains_key(name) {
-                    let node_id = insert_node_cached(db.conn(), &NodeRecord {
-                        file_id: ext_file_id,
-                        node_type: node_type.into(),
-                        name: name.into(),
-                        qualified_name: Some(format!("<external>/{}", name)),
-                        start_line: 0,
-                        end_line: 0,
-                        code_content: String::new(),
-                        signature: None,
-                        doc_comment: None,
-                        context_string: None,
-                        name_tokens: None,
-                        return_type: None,
-                        param_types: None,
-                        is_test: false,
-                    })?;
-                    ext_node_ids.insert(name.into(), node_id);
-                    total_nodes_created += 1;
-                }
-            }
-
-            for (source_id, target_name, relation) in &unresolved_externals {
-                if let Some(&ext_id) = ext_node_ids.get(target_name.as_str()) {
-                    if insert_edge_cached(db.conn(), *source_id, ext_id, relation, None)? {
-                        total_edges_created += 1;
-                    }
-                }
-            }
-        }
-
-        // Phase 2c: Restore cross-file inbound edges lost to cascade delete.
-        // When a file is re-indexed, its old nodes are deleted (cascade-deleting edges).
-        // Edges from OTHER files into the re-indexed file must be rebuilt using new node IDs.
-        if !saved_inbound_edges.is_empty() {
-            // Build name → new_node_id map for batch files only
-            let mut batch_name_to_ids: HashMap<&str, Vec<i64>> = HashMap::new();
-            for pf in &batch_parsed {
-                for (id, name) in pf.node_ids.iter().zip(pf.node_names.iter()) {
-                    batch_name_to_ids.entry(name.as_str()).or_default().push(*id);
-                }
-            }
-
-            let mut restored = 0usize;
-            let mut skipped_intra_batch = 0usize;
-            for (source_id, source_file_id, target_name, relation, metadata) in &saved_inbound_edges {
-                // Source file is also in this batch — source_id is stale (deleted + re-created).
-                // Phase 2 already resolves cross-file edges for intra-batch files.
-                if batch_file_ids.contains(source_file_id) {
-                    skipped_intra_batch += 1;
-                    continue;
-                }
-                if let Some(new_target_ids) = batch_name_to_ids.get(target_name.as_str()) {
-                    for &new_tgt_id in new_target_ids {
-                        if *source_id != new_tgt_id
-                            && insert_edge_cached(db.conn(), *source_id, new_tgt_id, relation, metadata.as_deref())? {
-                            total_edges_created += 1;
-                            restored += 1;
-                        }
-                    }
-                }
-            }
-            if restored > 0 || skipped_intra_batch > 0 {
-                tracing::debug!("[index] Restored {} cross-file inbound edges, skipped {} intra-batch", restored, skipped_intra_batch);
-            }
-        }
-
-        tx.commit()?;
-
-        let batch_file_count = batch_parsed.len();
-
-        // Update global_name_map: remove old entries for batch files, add new ones
-        for (_, entries) in global_name_map.iter_mut() {
-            entries.retain(|(_id, path, _lang)| !batch_file_paths.contains(path.as_str()));
-        }
-        global_name_map.retain(|_, entries| !entries.is_empty());
-
-        // Convert to lightweight records — drops Tree and source string
-        for pf in batch_parsed {
-            // Add newly committed nodes to the global map
-            let pf_lang = Some(pf.language.clone());
-            for (id, name) in pf.node_ids.iter().zip(pf.node_names.iter()) {
-                global_name_map.entry(name.clone())
-                    .or_default()
-                    .push((*id, pf.rel_path.clone(), pf_lang.clone()));
-            }
-            all_indexed.push(FileIndexed {
-                rel_path: pf.rel_path,
-                node_ids: pf.node_ids,
-                node_names: pf.node_names,
-            });
-            // pf.tree and pf.source are dropped here — memory freed
-        }
-
-        // Report progress after each batch
-        if let Some(cb) = progress {
-            cb(all_indexed.len(), files.len());
-        }
-
-        if files.len() > BATCH_SIZE {
-            tracing::info!(
-                "[index] batch {}/{}: {} files ({} nodes, {} edges)",
-                all_indexed.len(), files.len(),
-                batch_file_count, total_nodes_created, total_edges_created
-            );
-        }
-    }
-
-    // Phase 3: Build context strings + embeddings (single transaction, lightweight)
-    if !all_indexed.is_empty() {
-        let tx = db.conn().unchecked_transaction()?;
-        let all_node_ids: Vec<i64> = all_indexed.iter()
-            .flat_map(|fi| fi.node_ids.iter().copied()).collect();
-        let all_edges = get_edges_batch(db.conn(), &all_node_ids)?;
-        let all_node_details: HashMap<i64, (NodeResult, Option<String>)> = {
-            let nodes = get_nodes_with_files_by_ids(db.conn(), &all_node_ids)?;
-            nodes.into_iter().map(|nwf| (nwf.node.id, (nwf.node, nwf.language))).collect()
-        };
-
-        // Phase 3a: Build all context strings (CPU-bound, parallelized with rayon)
-        // Flatten to (node_id, node_name, file_path) tuples for parallel iteration
-        let node_tasks: Vec<(i64, &str, &str)> = all_indexed.iter()
-            .flat_map(|fi| {
-                fi.node_ids.iter().enumerate().map(move |(idx, &node_id)| {
-                    (node_id, fi.node_names[idx].as_str(), fi.rel_path.as_str())
-                })
-            })
-            .collect();
-
-        let context_updates: Vec<(i64, String)> = node_tasks.par_iter()
-            .map(|&(node_id, node_name, file_path)| {
-                let edges = all_edges.get(&node_id);
-                let cat = categorize_edges(edges, format_route_from_metadata);
-                let node_detail = all_node_details.get(&node_id);
-
-                let ctx = build_context_string(&NodeContext {
-                    node_type: node_detail.map(|(n, _)| n.node_type.clone()).unwrap_or_default(),
-                    name: node_name.to_string(),
-                    qualified_name: node_detail.and_then(|(n, _)| n.qualified_name.clone()),
-                    file_path: file_path.to_string(),
-                    language: node_detail.and_then(|(_, lang)| lang.clone()),
-                    signature: node_detail.and_then(|(n, _)| n.signature.clone()),
-                    return_type: node_detail.and_then(|(n, _)| n.return_type.clone()),
-                    param_types: node_detail.and_then(|(n, _)| n.param_types.clone()),
-                    code_content: node_detail.map(|(n, _)| n.code_content.clone()),
-                    routes: cat.routes,
-                    callees: cat.callees,
-                    callers: cat.callers,
-                    inherits: cat.inherits,
-                    imports: cat.imports,
-                    implements: cat.implements,
-                    exports: cat.exports,
-                    doc_comment: node_detail.and_then(|(n, _)| n.doc_comment.clone()),
-                });
-
-                (node_id, ctx)
-            })
-            .collect();
-
-        // Phase 3b: Batch update context strings in DB
-        update_context_strings_batch(db.conn(), &context_updates)?;
-        tx.commit()?;
-
-        tracing::info!(
-            "[index] Phase 3: context strings built for {} nodes",
-            all_node_ids.len()
-        );
-
-        // Phase 3c: Embed outside the committed tx — recoverable on failure via repair_null_context_strings
-        if let Some(m) = model {
-            if db.vec_enabled() {
-                embed_and_store_batch(db, m, &context_updates)?;
-            }
-        }
-    }
-
-    // Phase 2c: sweep pending_unresolved_calls — promote any rows whose
-    // target_name now resolves against a same-language node. Cheap when the
-    // table is empty (typical after a full index of a self-contained codebase).
-    let pending_resolved = resolve_pending_calls(db)?;
-    total_edges_created += pending_resolved;
-    if pending_resolved > 0 {
-        tracing::info!(
-            "[index] Phase 2c: resolved {} pending unresolved calls",
-            pending_resolved
-        );
-    }
-
-    // Optimize query planner statistics after bulk writes
-    if !all_indexed.is_empty() {
-        let _ = db.run_optimize();
-    }
-
-    let stats = IndexStats {
-        files_skipped_size: skipped_size.load(AtomicOrdering::Relaxed),
-        files_skipped_parse: skipped_parse.load(AtomicOrdering::Relaxed),
-        files_skipped_read: skipped_read.load(AtomicOrdering::Relaxed),
-        files_skipped_hash: skipped_hash.load(AtomicOrdering::Relaxed),
-        files_skipped_language: skipped_language.load(AtomicOrdering::Relaxed),
-    };
-
-    Ok(IndexResult {
-        files_indexed: all_indexed.len(),
-        nodes_created: total_nodes_created,
-        edges_created: total_edges_created,
-        stats,
-    })
-}
-
-/// Sweep `pending_unresolved_calls` against the current node state. Rows whose
-/// `(target_name, source_language)` now match a real node become a `calls`
-/// edge and the pending row is dropped; rows that still don't resolve stay
-/// buffered for the next index pass.
-///
-/// Resolution priority mirrors Phase 2: same-language candidates only (no
-/// cross-language promotion — memory `feedback_edge_resolution_same_language.md`
-/// flags that as the canonical false-positive class), with
-/// `refine_ambiguous_targets` applied when multiple candidates share the name.
-///
-/// Returns the number of edges inserted by this sweep.
-fn resolve_pending_calls(db: &Database) -> Result<usize> {
-    let pending = crate::storage::queries::list_pending_unresolved_calls(db.conn())?;
-    if pending.is_empty() {
-        return Ok(0);
-    }
-
-    // Build name → [(node_id, language)] map ONCE, then iterate pending rows
-    // in memory. Narrowed by `n.name IN (SELECT DISTINCT target_name ...)` so
-    // even a 1-row pending table doesn't trigger a full nodes-table scan on
-    // every incremental pass — for a 100K-node project the unfiltered SELECT
-    // was 100K rows × every index call, even with no work to do.
-    use crate::storage::queries::{insert_edge_cached, delete_pending_unresolved_call};
-    let mut name_to_lang_targets: HashMap<String, Vec<(i64, String)>> = HashMap::new();
-    let mut node_id_to_path: HashMap<i64, String> = HashMap::new();
-    {
-        let mut stmt = db.conn().prepare(
-            "SELECT n.id, n.name, COALESCE(f.language, ''), f.path
-             FROM nodes n JOIN files f ON f.id = n.file_id
-             WHERE f.language IS NOT NULL
-               AND n.name IN (SELECT DISTINCT target_name FROM pending_unresolved_calls)"
-        )?;
-        let rows = stmt.query_map([], |row| {
-            Ok((
-                row.get::<_, i64>(0)?,
-                row.get::<_, String>(1)?,
-                row.get::<_, String>(2)?,
-                row.get::<_, String>(3)?,
-            ))
-        })?;
-        for row in rows {
-            let (id, name, lang, path) = row?;
-            if lang.is_empty() {
-                continue;
-            }
-            name_to_lang_targets.entry(name).or_default().push((id, lang));
-            node_id_to_path.insert(id, path);
-        }
-    }
-
-    // Map source_id → source file path so refine_ambiguous_targets gets the
-    // proximity hint it needs.
-    let source_ids: Vec<i64> = pending.iter().map(|p| p.source_id).collect();
-    let mut source_id_to_path: HashMap<i64, String> = HashMap::new();
-    if !source_ids.is_empty() {
-        let placeholders = std::iter::repeat_n("?", source_ids.len()).collect::<Vec<_>>().join(",");
-        let sql = format!(
-            "SELECT n.id, f.path FROM nodes n JOIN files f ON f.id = n.file_id WHERE n.id IN ({})",
-            placeholders
-        );
-        let mut stmt = db.conn().prepare(&sql)?;
-        let params: Vec<&dyn rusqlite::ToSql> = source_ids.iter()
-            .map(|id| id as &dyn rusqlite::ToSql)
-            .collect();
-        let rows = stmt.query_map(params.as_slice(), |row| {
-            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
-        })?;
-        for row in rows {
-            let (id, path) = row?;
-            source_id_to_path.insert(id, path);
-        }
-    }
-
-    let mut edges_added = 0usize;
-    let mut to_delete: Vec<i64> = Vec::new();
-
-    for row in &pending {
-        let candidates: Vec<i64> = name_to_lang_targets.get(&row.target_name)
-            .map(|entries| entries.iter()
-                .filter(|(_, lang)| *lang == row.source_language)
-                .map(|(id, _)| *id)
-                .filter(|id| *id != row.source_id) // self-call guard
-                .collect())
-            .unwrap_or_default();
-
-        if candidates.is_empty() {
-            continue; // still unresolvable — leave buffered
-        }
-
-        let refined = if candidates.len() > 1 {
-            let source_path = source_id_to_path.get(&row.source_id).cloned().unwrap_or_default();
-            refine_ambiguous_targets(&candidates, &source_path, &node_id_to_path)
-        } else {
-            candidates
-        };
-
-        for tgt_id in &refined {
-            if insert_edge_cached(
-                db.conn(),
-                row.source_id,
-                *tgt_id,
-                REL_CALLS,
-                row.metadata.as_deref(),
-            )? {
-                edges_added += 1;
-            }
-        }
-        to_delete.push(row.id);
-    }
-
-    for id in to_delete {
-        delete_pending_unresolved_call(db.conn(), id)?;
-    }
-
-    Ok(edges_added)
-}
-
-/// Disambiguate N same-language cross-file candidates for a single call/import
-/// target. Returns a subset. A single-element result is the authoritative
-/// winner; ties fall back to the full input so the caller does not
-/// inadvertently drop legitimate edges.
-///
-/// Heuristic: (1) prefer non-test-file candidates when the caller is not
-/// itself a test file; (2) among the preferred pool, keep only those tied
-/// for the longest byte-common path prefix with the caller. Previous
-/// versions dropped on ambiguity, which regressed dead-code detection for
-/// bare-name Rust calls like `crate::domain::foo()` where scoped_identifier
-/// extraction keeps only `foo` and two `foo` definitions under `src/` tie
-/// on prefix — better to keep both edges than to report `foo` as dead.
-fn refine_ambiguous_targets(
-    candidates: &[i64],
-    caller_rel_path: &str,
-    node_id_to_path: &HashMap<i64, String>,
-) -> Vec<i64> {
-    if candidates.len() <= 1 {
-        return candidates.to_vec();
-    }
-
-    let is_test_path = |p: &str| {
-        p.contains(".test.") || p.contains("_test.")
-            || p.starts_with("tests/") || p.contains("/tests/")
-            || p.starts_with("test/") || p.contains("/test/")
-            || p.contains(".spec.")
-    };
-    let caller_is_test = is_test_path(caller_rel_path);
-
-    // Pass 1: prefer non-test candidates when the caller is non-test code.
-    let pool: Vec<i64> = if caller_is_test {
-        candidates.to_vec()
-    } else {
-        let non_test: Vec<i64> = candidates.iter().copied()
-            .filter(|id| {
-                let p = node_id_to_path.get(id).map(String::as_str).unwrap_or("");
-                !is_test_path(p)
-            })
-            .collect();
-        if non_test.is_empty() { candidates.to_vec() } else { non_test }
-    };
-
-    if pool.len() == 1 { return pool; }
-
-    // Pass 2: keep only candidates tied for the longest common path prefix
-    // with the caller. Byte-wise prefix is a rough proxy for module locality
-    // — e.g. `claude-plugin/scripts/session-init.js` shares 21 bytes with
-    // `claude-plugin/scripts/lifecycle.js` but 0 bytes with `scripts/*`.
-    let prefix_len = |p: &str| -> usize {
-        caller_rel_path.bytes().zip(p.bytes())
-            .take_while(|(a, b)| a == b)
-            .count()
-    };
-    let max_prefix = pool.iter()
-        .map(|id| prefix_len(node_id_to_path.get(id).map(String::as_str).unwrap_or("")))
-        .max()
-        .unwrap_or(0);
-    let closest: Vec<i64> = pool.iter().copied()
-        .filter(|id| prefix_len(node_id_to_path.get(id).map(String::as_str).unwrap_or("")) == max_prefix)
-        .collect();
-
-    if closest.len() == 1 { return closest; }
-
-    // Still ambiguous — return the remaining pool rather than dropping. This
-    // keeps dead-code precision high for edges we cannot confidently prune
-    // (most notably Rust bare-name scoped calls) at the cost of leaving a
-    // small amount of fan-out; the single-winner fast path above handles
-    // the common case (unique non-test match, or unique closest path).
-    if !closest.is_empty() { closest } else { pool }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::storage::queries::{get_nodes_by_name, get_edges_from, get_import_tree};
-    use tempfile::TempDir;
-    use std::fs;
-
-    #[test]
-    fn test_full_index_pipeline() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-
-        fs::create_dir_all(project_dir.path().join("src")).unwrap();
-        fs::write(project_dir.path().join("src/auth.ts"), r#"
-function validateToken(token: string): boolean {
-    return jwt.verify(token);
-}
-
-function handleLogin(req: Request) {
-    if (validateToken(req.token)) {
-        return createSession(req.userId);
-    }
-}
-"#).unwrap();
-
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        assert!(result.files_indexed > 0);
-        assert!(result.nodes_created > 0);
-        assert!(result.edges_created > 0);
-
-        // Verify nodes are in DB
-        let nodes = get_nodes_by_name(db.conn(), "handleLogin").unwrap();
-        assert_eq!(nodes.len(), 1);
-
-        // Verify edges: handleLogin → calls → validateToken
-        let edges = get_edges_from(db.conn(), nodes[0].id).unwrap();
-        assert!(edges.iter().any(|e| e.relation == REL_CALLS), "should have call edges");
-
-        // Verify context string was built
-        assert!(nodes[0].context_string.is_some(), "context string should be set after Phase 3");
-    }
-
-    #[test]
-    fn test_cross_language_bare_name_call_resolution() {
-        // Regression: Rust method call `hasher.update(...)` was resolving to
-        // JS `function update()` via global bare-name lookup, producing phantom
-        // Rust → JS call edges in mixed projects. Fix: same-file > same-language
-        // tiers; drop call edges with no same-language candidate.
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        fs::create_dir_all(project_dir.path().join("src")).unwrap();
-        fs::create_dir_all(project_dir.path().join("scripts")).unwrap();
-
-        fs::write(project_dir.path().join("src/hasher.rs"), r#"
-pub fn caller_rs() {
-    let mut h = Hasher::new();
-    h.update(&[1, 2, 3]);
-    h.finalize();
-}
-"#).unwrap();
-
-        fs::write(project_dir.path().join("scripts/helper.js"), r#"
-function update() { return 1; }
-function caller_js() { update(); }
-"#).unwrap();
-
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        let rust_caller = crate::storage::queries::get_nodes_with_files_by_name(
-            db.conn(), "caller_rs",
-        ).unwrap();
-        let rust_caller = rust_caller.iter()
-            .find(|n| n.file_path == "src/hasher.rs")
-            .expect("Rust caller_rs should be indexed");
-        let edges = get_edges_from(db.conn(), rust_caller.node.id).unwrap();
-        for e in &edges {
-            if e.relation != REL_CALLS { continue; }
-            let tgt_path: Option<String> = db.conn().query_row(
-                "SELECT f.path FROM nodes n JOIN files f ON n.file_id = f.id WHERE n.id = ?1",
-                [e.target_id], |row| row.get(0),
-            ).ok();
-            assert!(
-                !tgt_path.as_deref().unwrap_or("").ends_with(".js"),
-                "Rust caller must not resolve calls into JS; got edge → {:?}", tgt_path,
-            );
-        }
-
-        let js_caller = crate::storage::queries::get_nodes_with_files_by_name(
-            db.conn(), "caller_js",
-        ).unwrap();
-        let js_caller = js_caller.iter()
-            .find(|n| n.file_path == "scripts/helper.js")
-            .expect("JS caller_js should be indexed");
-        let js_edges = get_edges_from(db.conn(), js_caller.node.id).unwrap();
-        let js_call_targets: Vec<i64> = js_edges.iter()
-            .filter(|e| e.relation == REL_CALLS)
-            .map(|e| e.target_id)
-            .collect();
-        assert!(!js_call_targets.is_empty(),
-            "JS caller_js → update edge within same file should still resolve");
-    }
-
-    #[test]
-    fn test_js_require_creates_external_import_edges() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        fs::write(project_dir.path().join("app.js"), r#"
-const fs = require('fs');
-const path = require('path');
-const lifecycle = require('./lifecycle');
-
-function main() { fs.readFileSync('x'); }
-"#).unwrap();
-
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        let imports: Vec<String> = db.conn().prepare(
-            "SELECT DISTINCT n2.name FROM edges e
-             JOIN nodes n ON n.id = e.source_id
-             JOIN files f ON f.id = n.file_id
-             JOIN nodes n2 ON n2.id = e.target_id
-             WHERE f.path = 'app.js' AND e.relation = 'imports'"
-        ).unwrap()
-         .query_map([], |row| row.get::<_, String>(0)).unwrap()
-         .filter_map(Result::ok)
-         .collect();
-
-        assert!(imports.contains(&"fs".to_string()),        "imports: {:?}", imports);
-        assert!(imports.contains(&"path".to_string()),      "imports: {:?}", imports);
-        assert!(imports.contains(&"lifecycle".to_string()), "imports: {:?}", imports);
-    }
-
-    #[test]
-    fn test_js_same_name_cross_file_prefers_closest_path() {
-        // Regression: when JS defines the same helper name in multiple files
-        // (e.g., `readJson` in both `claude-plugin/scripts/lifecycle.js` and
-        // `scripts/install-e2e.test.js`), a caller in `claude-plugin/scripts/*`
-        // used to fan out an edge to every same-language match, producing
-        // false-positive callers across unrelated modules. The resolver must
-        // pick the candidate with the longest common path prefix to the
-        // caller file (and prefer non-test files) rather than all.
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        fs::create_dir_all(project_dir.path().join("pkg/scripts")).unwrap();
-        fs::create_dir_all(project_dir.path().join("tests")).unwrap();
-
-        fs::write(project_dir.path().join("pkg/scripts/lifecycle.js"), r#"
-function readJson(p) { return 1; }
-module.exports = { readJson };
-"#).unwrap();
-
-        fs::write(project_dir.path().join("pkg/scripts/session-init.js"), r#"
-function syncLifecycleConfig() { readJson('x'); }
-"#).unwrap();
-
-        fs::write(project_dir.path().join("tests/helpers.test.js"), r#"
-function readJson(p) { return 2; }
-"#).unwrap();
-
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        // Find the caller node
-        let caller = crate::storage::queries::get_nodes_with_files_by_name(
-            db.conn(), "syncLifecycleConfig",
-        ).unwrap();
-        let caller = caller.iter()
-            .find(|n| n.file_path == "pkg/scripts/session-init.js")
-            .expect("syncLifecycleConfig should be indexed");
-
-        let edges = get_edges_from(db.conn(), caller.node.id).unwrap();
-        let call_edges: Vec<i64> = edges.iter()
-            .filter(|e| e.relation == REL_CALLS)
-            .map(|e| e.target_id)
-            .collect();
-
-        // Resolve target paths
-        let target_paths: Vec<String> = call_edges.iter().filter_map(|tid| {
-            db.conn().query_row(
-                "SELECT f.path FROM nodes n JOIN files f ON n.file_id = f.id WHERE n.id = ?1",
-                [*tid], |row| row.get(0)
-            ).ok()
-        }).collect();
-
-        // Must pick exactly the same-dir candidate, not fan out to the test file.
-        assert!(
-            target_paths.iter().any(|p| p == "pkg/scripts/lifecycle.js"),
-            "should resolve to same-dir readJson; got {:?}", target_paths
-        );
-        assert!(
-            !target_paths.iter().any(|p| p == "tests/helpers.test.js"),
-            "should NOT fan out to unrelated test-file readJson; got {:?}", target_paths
-        );
-    }
-
-    #[test]
-    fn test_js_module_level_test_callback_calls_resolve() {
-        // Regression: helpers defined in a JS test file that are called only
-        // from inside `test(() => {...})` / `describe(() => {...})` callbacks
-        // used to be reported as orphan by dead-code, because the anonymous
-        // arrow callback body attributed its calls to `<anonymous>`, a name
-        // that resolves to no node. Module-level call_expressions inside JS
-        // test files must attribute to `<module>` so a same-file edge lands.
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-
-        fs::write(project_dir.path().join("helpers.test.js"), r#"
-function mkHome() { return '/tmp/x'; }
-function writeJson(p, v) { }
-
-test('uses helpers', () => {
-    const h = mkHome();
-    writeJson(h, { a: 1 });
-});
-"#).unwrap();
-
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        // Both helper names must have at least one incoming call edge.
-        for helper in ["mkHome", "writeJson"] {
-            let cnt: i64 = db.conn().query_row(
-                "SELECT COUNT(*) FROM edges e
-                 JOIN nodes tn ON tn.id = e.target_id
-                 JOIN files tf ON tf.id = tn.file_id
-                 WHERE tn.name = ?1 AND tf.path = 'helpers.test.js' AND e.relation = 'calls'",
-                [helper], |row| row.get(0),
-            ).unwrap();
-            assert!(cnt >= 1,
-                "{} should have at least one incoming call edge from the test callback, got {}",
-                helper, cnt);
-        }
-    }
-
-    #[test]
-    fn test_incremental_index() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Initial index
-        fs::write(project_dir.path().join("a.ts"), "function foo() {}").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        // Modify file
-        fs::write(project_dir.path().join("a.ts"), "function bar() {}").unwrap();
-
-        // Incremental index
-        let result = run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-        assert_eq!(result.files_indexed, 1);
-
-        let foo = get_nodes_by_name(db.conn(), "foo").unwrap();
-        assert_eq!(foo.len(), 0);
-        let bar = get_nodes_by_name(db.conn(), "bar").unwrap();
-        assert_eq!(bar.len(), 1);
-    }
-
-    #[test]
-    fn test_incremental_propagates_dirty_context() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Initial: B (in b.ts) calls A (in a.ts)
-        fs::write(project_dir.path().join("a.ts"), "function alpha() {}").unwrap();
-        fs::write(project_dir.path().join("b.ts"), "function beta() { alpha(); }").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        let beta_nodes = get_nodes_by_name(db.conn(), "beta").unwrap();
-        assert_eq!(beta_nodes.len(), 1);
-        let beta_ctx_before = beta_nodes[0].context_string.clone().unwrap_or_default();
-
-        // Change A: rename function (alpha -> alphaRenamed)
-        fs::write(project_dir.path().join("a.ts"), "function alphaRenamed() {}").unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        // beta's context_string should be updated (calls list changed because
-        // the old alpha node is gone and edge was cascade-deleted)
-        let beta_nodes_after = get_nodes_by_name(db.conn(), "beta").unwrap();
-        assert_eq!(beta_nodes_after.len(), 1);
-        let beta_ctx_after = beta_nodes_after[0].context_string.clone().unwrap_or_default();
-        assert_ne!(beta_ctx_before, beta_ctx_after);
-    }
-
-    #[test]
-    fn test_deleted_file_cleanup() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        fs::write(project_dir.path().join("a.ts"), "function foo() {}").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        fs::remove_file(project_dir.path().join("a.ts")).unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        let foo = get_nodes_by_name(db.conn(), "foo").unwrap();
-        assert_eq!(foo.len(), 0);
-    }
-
-    #[test]
-    fn test_build_python_module_map() {
-        let mut paths = HashSet::new();
-        paths.insert("myapp/utils.py".into());
-        paths.insert("myapp/__init__.py".into());
-        paths.insert("src/myapp/models.py".into());
-
-        let map = build_python_module_map(&paths);
-
-        // Full dotted path
-        assert!(map.get("myapp.utils").unwrap().contains(&"myapp/utils.py".to_string()));
-        // Suffix path
-        assert!(map.get("utils").unwrap().contains(&"myapp/utils.py".to_string()));
-        // __init__.py maps to package
-        assert!(map.get("myapp").unwrap().contains(&"myapp/__init__.py".to_string()));
-        // Nested with src/ prefix
-        assert!(map.get("myapp.models").unwrap().contains(&"src/myapp/models.py".to_string()));
-    }
-
-    #[test]
-    fn test_python_from_import_resolution() {
-        // Test `from myapp.utils import helper` creates correct cross-file edge
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        fs::create_dir_all(project_dir.path().join("myapp")).unwrap();
-        fs::write(
-            project_dir.path().join("myapp/utils.py"),
-            "def helper():\n    return 42\n",
-        ).unwrap();
-        fs::write(
-            project_dir.path().join("myapp/main.py"),
-            "from myapp.utils import helper\n\ndef main():\n    helper()\n",
-        ).unwrap();
-
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert!(result.edges_created > 0, "should create import edges");
-
-        // Verify dependency: main.py -> utils.py
-        let deps = get_import_tree(db.conn(), "myapp/main.py", "outgoing", 1).unwrap();
-        assert!(
-            deps.iter().any(|d| d.file_path == "myapp/utils.py"),
-            "main.py should depend on utils.py, got: {:?}",
-            deps.iter().map(|d| &d.file_path).collect::<Vec<_>>()
-        );
-    }
-
-    #[test]
-    fn test_python_import_module_resolution() {
-        // Test `import myutils` creates correct cross-file edge
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        fs::write(
-            project_dir.path().join("myutils.py"),
-            "def do_something():\n    pass\n",
-        ).unwrap();
-        fs::write(
-            project_dir.path().join("main.py"),
-            "import myutils\n\ndef main():\n    myutils.do_something()\n",
-        ).unwrap();
-
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert!(result.edges_created > 0, "should create import edges");
-
-        // Verify dependency: main.py -> myutils.py
-        let deps = get_import_tree(db.conn(), "main.py", "outgoing", 1).unwrap();
-        assert!(
-            deps.iter().any(|d| d.file_path == "myutils.py"),
-            "main.py should depend on myutils.py, got: {:?}",
-            deps.iter().map(|d| &d.file_path).collect::<Vec<_>>()
-        );
-    }
-
-    #[test]
-    fn test_python_external_import_creates_virtual_nodes() {
-        // Test that external imports create virtual nodes in <external> file
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        fs::write(
-            project_dir.path().join("app.py"),
-            "import os\nfrom collections import OrderedDict\nfrom flask import Flask\n\ndef main():\n    pass\n",
-        ).unwrap();
-
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert!(result.files_indexed > 0, "should index the file");
-
-        // Verify <external> file was created with virtual nodes
-        let ext_nodes = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
-        let ext_names: Vec<&str> = ext_nodes.iter().map(|n| n.name.as_str()).collect();
-        assert!(ext_names.contains(&"os"), "should have virtual node for 'os', got: {:?}", ext_names);
-        assert!(ext_names.contains(&"collections"), "should have virtual node for 'collections', got: {:?}", ext_names);
-        assert!(ext_names.contains(&"flask"), "should have virtual node for 'flask', got: {:?}", ext_names);
-
-        // Verify dependency_graph shows <external> as a dependency
-        let deps = get_import_tree(db.conn(), "app.py", "outgoing", 1).unwrap();
-        assert!(
-            deps.iter().any(|d| d.file_path == "<external>"),
-            "app.py should show <external> dependency, got: {:?}",
-            deps.iter().map(|d| &d.file_path).collect::<Vec<_>>()
-        );
-    }
-
-    #[test]
-    fn test_python_mixed_internal_external_imports() {
-        // Test project with both internal and external imports
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        fs::create_dir_all(project_dir.path().join("myapp")).unwrap();
-        fs::write(
-            project_dir.path().join("myapp/utils.py"),
-            "def helper():\n    return 42\n",
-        ).unwrap();
-        fs::write(
-            project_dir.path().join("myapp/main.py"),
-            "import os\nfrom myapp.utils import helper\nfrom flask import Flask\n\ndef main():\n    helper()\n",
-        ).unwrap();
-
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert!(result.edges_created > 0);
-
-        // Should have internal dependency
-        let deps = get_import_tree(db.conn(), "myapp/main.py", "outgoing", 1).unwrap();
-        let dep_files: Vec<&str> = deps.iter().map(|d| d.file_path.as_str()).collect();
-        assert!(dep_files.contains(&"myapp/utils.py"), "should depend on internal utils.py, got: {:?}", dep_files);
-
-        // Should also have external dependency
-        assert!(dep_files.contains(&"<external>"), "should depend on <external>, got: {:?}", dep_files);
-    }
-
-    #[test]
-    fn test_index_stats_skipped_large_file() {
-        // Verify that IndexResult.stats tracks files skipped due to size
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Create a normal file
-        fs::write(project_dir.path().join("small.ts"), "function ok() {}").unwrap();
-
-        // Create a file exceeding MAX_FILE_SIZE (10MB)
-        let big_content = "a".repeat(11 * 1024 * 1024);
-        fs::write(project_dir.path().join("huge.ts"), &big_content).unwrap();
-
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert_eq!(result.files_indexed, 1, "should index the small file");
-        assert_eq!(result.stats.files_skipped_size, 1, "should track the large file skip");
-    }
-
-    #[test]
-    fn test_index_stats_skipped_parse_error() {
-        // Verify that IndexResult.stats tracks files skipped due to parse errors
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Create a valid file
-        fs::write(project_dir.path().join("good.ts"), "function ok() {}").unwrap();
-
-        // Create a file with an unsupported extension that detect_language returns None for
-        // (this is filtered by detect_language returning None, not a parse error)
-        // Instead, we just verify the default stats are zero for parse errors
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert_eq!(result.stats.files_skipped_parse, 0);
-        assert_eq!(result.stats.files_skipped_read, 0);
-        assert_eq!(result.stats.files_skipped_hash, 0);
-    }
-
-    #[test]
-    fn test_index_stats_default() {
-        // IndexStats should implement Default
-        let stats = IndexStats::default();
-        assert_eq!(stats.files_skipped_size, 0);
-        assert_eq!(stats.files_skipped_parse, 0);
-        assert_eq!(stats.files_skipped_read, 0);
-        assert_eq!(stats.files_skipped_hash, 0);
-        assert_eq!(stats.files_skipped_language, 0);
-    }
-
-    #[test]
-    fn test_python_external_survives_incremental_index() {
-        // Test that <external> pseudo-file persists across incremental re-indexes
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        fs::write(
-            project_dir.path().join("app.py"),
-            "import os\n\ndef main():\n    pass\n",
-        ).unwrap();
-
-        // Full index → creates <external> with "os" node
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-        let ext_before = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
-        assert!(!ext_before.is_empty(), "should have external nodes after full index");
-
-        // Modify file slightly
-        fs::write(
-            project_dir.path().join("app.py"),
-            "import os\n\ndef main():\n    return 1\n",
-        ).unwrap();
-
-        // Incremental index → <external> should survive
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-        let ext_after = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
-        assert!(!ext_after.is_empty(), "external nodes should survive incremental index");
-
-        // Verify dependency still visible
-        let deps = get_import_tree(db.conn(), "app.py", "outgoing", 1).unwrap();
-        assert!(
-            deps.iter().any(|d| d.file_path == "<external>"),
-            "app.py should still show <external> dependency after incremental index"
-        );
-    }
-
-    #[test]
-    fn test_repair_null_context_strings() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Index a file so nodes get context strings
-        fs::write(project_dir.path().join("a.ts"), r#"
-function alpha() { return 1; }
-function beta() { alpha(); }
-"#).unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        // Verify context strings exist after index
-        let alpha_nodes = get_nodes_by_name(db.conn(), "alpha").unwrap();
-        assert_eq!(alpha_nodes.len(), 1);
-        assert!(alpha_nodes[0].context_string.is_some(), "alpha should have context_string after index");
-
-        let beta_nodes = get_nodes_by_name(db.conn(), "beta").unwrap();
-        assert_eq!(beta_nodes.len(), 1);
-        assert!(beta_nodes[0].context_string.is_some(), "beta should have context_string after index");
-
-        // Simulate Phase 3 failure: NULL out context_strings
-        db.conn().execute("UPDATE nodes SET context_string = NULL", []).unwrap();
-
-        // Verify they are now NULL
-        let alpha_after_null = get_nodes_by_name(db.conn(), "alpha").unwrap();
-        assert!(alpha_after_null[0].context_string.is_none(), "alpha context_string should be NULL after simulated failure");
-
-        // Run repair
-        let repaired = repair_null_context_strings(&db, None).unwrap();
-        assert!(repaired > 0, "should repair at least 1 node");
-
-        // Verify context strings were restored
-        let alpha_repaired = get_nodes_by_name(db.conn(), "alpha").unwrap();
-        assert!(alpha_repaired[0].context_string.is_some(), "alpha should have context_string after repair");
-
-        let beta_repaired = get_nodes_by_name(db.conn(), "beta").unwrap();
-        assert!(beta_repaired[0].context_string.is_some(), "beta should have context_string after repair");
-    }
-
-    #[test]
-    fn test_rust_implements_creates_sentinel_for_external_trait() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        fs::write(project_dir.path().join("main.rs"), r#"
-use std::io::{self, Write};
-use std::fmt;
-
-struct MyWriter;
-
-impl Write for MyWriter {
-    fn write(&mut self, buf: &[u8]) -> io::Result<usize> { Ok(buf.len()) }
-    fn flush(&mut self) -> io::Result<()> { Ok(()) }
-}
-
-impl fmt::Display for MyWriter {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "MyWriter")
-    }
-}
-"#).unwrap();
-
-        let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert!(result.files_indexed > 0);
-
-        // Verify sentinel nodes created for external traits
-        let ext_nodes = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
-        let ext_names: Vec<&str> = ext_nodes.iter().map(|n| n.name.as_str()).collect();
-        assert!(ext_names.contains(&"Write"), "should have sentinel for Write, got: {:?}", ext_names);
-        // fmt::Display keeps path prefix (as parsed by tree-sitter)
-        assert!(ext_names.contains(&"fmt::Display"), "should have sentinel for fmt::Display, got: {:?}", ext_names);
-
-        // Verify sentinel type is "trait"
-        let write_node = ext_nodes.iter().find(|n| n.name == "Write").unwrap();
-        assert_eq!(write_node.node_type, "trait", "sentinel should be type 'trait'");
-
-        // Verify implements edges exist: MyWriter → Write, MyWriter → Display
-        let edges: Vec<(String, String)> = db.conn().prepare(
-            "SELECT ns.name, nt.name FROM edges e
-             JOIN nodes ns ON ns.id = e.source_id
-             JOIN nodes nt ON nt.id = e.target_id
-             WHERE e.relation = 'implements'"
-        ).unwrap()
-        .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
-        .unwrap()
-        .collect::<Result<Vec<_>, _>>().unwrap();
-
-        assert!(edges.contains(&("MyWriter".into(), "Write".into())),
-            "should have MyWriter→Write implements edge, got: {:?}", edges);
-        assert!(edges.contains(&("MyWriter".into(), "fmt::Display".into())),
-            "should have MyWriter→fmt::Display implements edge, got: {:?}", edges);
-    }
-
-    /// ensure_file_indexed must (a) be a no-op when on-disk hash matches the
-    /// stored hash, and (b) actually pick up post-edit content when it doesn't.
-    /// This is the contract the MCP `ensure_file_fresh_opt` wrapper relies on
-    /// to close the post-Edit→pre-incremental-index window.
-    #[test]
-    fn test_ensure_file_indexed_picks_up_post_edit_changes() {
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Initial state: file with `alpha`
-        fs::write(project_dir.path().join("a.ts"), "function alpha() {}\n").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-        let names_before: Vec<String> = get_nodes_by_name(db.conn(), "alpha")
-            .unwrap().into_iter().map(|n| n.name).collect();
-        assert_eq!(names_before, vec!["alpha".to_string()]);
-
-        // No-op when hashes match
-        let did = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
-        assert!(!did, "matching hash must be a no-op (got reindex)");
-
-        // Edit on disk; old `alpha` removed, new `beta` added
-        fs::write(project_dir.path().join("a.ts"), "function beta() {}\n").unwrap();
-        let did2 = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
-        assert!(did2, "hash mismatch must trigger a reindex");
-
-        // alpha gone, beta present — post-Edit query would now see fresh state
-        assert!(get_nodes_by_name(db.conn(), "alpha").unwrap().is_empty(),
-            "old alpha must be evicted by single-file reindex");
-        let beta = get_nodes_by_name(db.conn(), "beta").unwrap();
-        assert_eq!(beta.len(), 1, "new beta must appear after single-file reindex");
-        assert_eq!(beta[0].name, "beta");
-
-        // Calling again with no on-disk change is a no-op
-        let did3 = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
-        assert!(!did3, "second call with no edit must no-op");
-
-        // Deleting the file from disk drops the row
-        fs::remove_file(project_dir.path().join("a.ts")).unwrap();
-        let did4 = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
-        assert!(did4, "missing file must trigger row cleanup");
-        assert!(get_nodes_by_name(db.conn(), "beta").unwrap().is_empty(),
-            "beta must be cascade-deleted with its file");
-    }
-
-    /// Root-cause test for `feedback_incremental_edge_timing.md`: file B
-    /// (existing, unchanged) bare-name calls `foo()`. file A is added later
-    /// with `function foo() {}`. Phase 2 of B's first index pass dropped the
-    /// edge because `foo` was unresolvable; before this fix, A's later index
-    /// never re-resolved B's call → permanently missing edge in incremental
-    /// mode (only `rebuild-index` recovered it).
-    ///
-    /// New behavior: B's drop becomes a `pending_unresolved_calls` row; A's
-    /// index pass sweeps pending and promotes the row into a real edge.
-    #[test]
-    fn test_pending_unresolved_call_resolves_when_callee_added_later() {
-        use crate::storage::queries::{count_pending_unresolved_calls, get_node_ids_by_name};
-        use crate::domain::REL_CALLS;
-
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Step 1: B exists alone with bare-name call to foo (foo undefined).
-        fs::write(project_dir.path().join("b.ts"),
-            "function caller_b() { foo(); }\n").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        // Phase 2 dropped the edge (no same-file/same-language target) and
-        // buffered the row instead.
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1,
-            "B's call to undefined foo must land in pending_unresolved_calls");
-
-        let caller_b_id = get_node_ids_by_name(db.conn(), "caller_b").unwrap()
-            .into_iter().next().expect("caller_b must exist").0;
-
-        // Verify NO edge yet (foo doesn't exist in DB).
-        let pre_edges = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
-        assert!(pre_edges.iter().all(|e| e.relation != REL_CALLS),
-            "no calls edge should exist yet — foo is undefined");
-
-        // Step 2: A is added with foo(). Incremental index picks it up; the
-        // pending sweep at end of index_files promotes B's buffered call into
-        // a real edge.
-        fs::write(project_dir.path().join("a.ts"),
-            "export function foo() {}\n").unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        let foo_id = get_node_ids_by_name(db.conn(), "foo").unwrap()
-            .into_iter().next().expect("foo must exist after A indexed").0;
-
-        let post_edges = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
-        let calls_to_foo: Vec<_> = post_edges.iter()
-            .filter(|e| e.relation == REL_CALLS && e.target_id == foo_id)
-            .collect();
-        assert_eq!(calls_to_foo.len(), 1,
-            "incremental index must promote pending call → calls edge caller_b → foo; \
-             got edges: {:?}", post_edges.iter().map(|e| (&e.relation, e.target_id)).collect::<Vec<_>>());
-
-        // Pending row must be drained after successful resolution.
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
-            "resolved pending row must be deleted after edge insertion");
-    }
-
-    /// Cross-language pending must NOT resolve cross-language. If B (TS)
-    /// calls `update()` and a later-indexed Rust file defines `fn update()`,
-    /// the pending row must stay buffered, not silently bind cross-language
-    /// (memory `feedback_edge_resolution_same_language.md`'s canonical
-    /// false-positive class).
-    #[test]
-    fn test_pending_unresolved_call_does_not_cross_language() {
-        use crate::storage::queries::count_pending_unresolved_calls;
-
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // TS file with bare-name call to `update`
-        fs::write(project_dir.path().join("client.ts"),
-            "function caller_ts() { update(); }\n").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1);
-
-        // Rust file with `update` — different language, must NOT match.
-        fs::write(project_dir.path().join("hasher.rs"),
-            "fn update() {}\n").unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        // Pending row stays — sweep refused cross-language resolution.
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1,
-            "cross-language target must NOT resolve a TS pending call to a Rust fn");
-    }
-
-    /// One caller with N undefined references must produce N pending rows;
-    /// when a single later-added file defines all N, all rows must resolve in
-    /// a single sweep. Real codebases hit this whenever a "barrel" or shared
-    /// utility module gets added after its consumers.
-    #[test]
-    fn test_pending_resolves_multiple_calls_in_same_caller() {
-        use crate::storage::queries::{count_pending_unresolved_calls, get_node_ids_by_name};
-
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // B has three undefined call targets — foo, bar, baz.
-        fs::write(project_dir.path().join("b.ts"),
-            "function caller_b() { foo(); bar(); baz(); }\n").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 3,
-            "three bare-name calls must produce three pending rows");
-
-        // A defines all three.
-        fs::write(project_dir.path().join("a.ts"),
-            "export function foo() {}\nexport function bar() {}\nexport function baz() {}\n").unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
-            "all three pending rows must drain once their targets exist");
-
-        // All three resolved into real edges.
-        let caller_b_id = get_node_ids_by_name(db.conn(), "caller_b").unwrap()
-            .into_iter().next().unwrap().0;
-        let edges = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
-        let calls_count = edges.iter().filter(|e| e.relation == REL_CALLS).count();
-        assert_eq!(calls_count, 3,
-            "caller_b must have exactly three calls edges (foo, bar, baz); got {} edges total: {:?}",
-            calls_count, edges.iter().map(|e| (&e.relation, e.target_id)).collect::<Vec<_>>());
-    }
-
-    /// When the caller's source file is reindexed (e.g. user edits B), the
-    /// cascade FK on pending_unresolved_calls(source_id) must drop B's pending
-    /// rows so a fresh Phase 2 can re-buffer them with the current source IDs.
-    /// This is the schema's load-bearing self-cleaning property — we test it
-    /// explicitly so a future migration that drops or weakens the FK fails
-    /// loudly here rather than leaking pending rows for ever-removed callers.
-    #[test]
-    fn test_pending_cascade_deletes_when_caller_file_reindexed() {
-        use crate::storage::queries::count_pending_unresolved_calls;
-
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // B with undefined target → pending row created.
-        fs::write(project_dir.path().join("b.ts"),
-            "function caller_b() { undefined_target(); }\n").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1);
-
-        // Edit B to remove the call entirely. caller_b's old node gets
-        // cascade-deleted on reindex (Phase 1 deletes prior rows), and its
-        // pending row must follow it via ON DELETE CASCADE on source_id.
-        fs::write(project_dir.path().join("b.ts"),
-            "function caller_b() { /* call removed */ }\n").unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
-            "pending row must be cascade-deleted when its source caller is removed/reindexed");
-    }
-
-    /// Inverse-direction symmetry test for `feedback_incremental_edge_timing.md`:
-    /// existing edge B → A.foo gets cascade-deleted when A is removed, and B
-    /// is NOT in changed_paths (deletion doesn't re-extract B). Without Phase 0
-    /// pre-cascade buffering, B has neither edge nor pending row — a permanent
-    /// silent edge loss until full rebuild. The Phase 0 buffer (added by this
-    /// fix) must capture B's call as a pending row before cascade fires.
-    #[test]
-    fn test_pending_buffers_on_callee_file_deletion() {
-        use crate::storage::queries::{count_pending_unresolved_calls, get_node_ids_by_name};
-
-        let project_dir = TempDir::new().unwrap();
-        let db_dir = TempDir::new().unwrap();
-        let db = Database::open(&db_dir.path().join("index.db")).unwrap();
-
-        // Initial: A defines foo, B calls foo — edge B.caller_b → A.foo exists.
-        fs::write(project_dir.path().join("a.ts"),
-            "export function foo() {}\n").unwrap();
-        fs::write(project_dir.path().join("b.ts"),
-            "function caller_b() { foo(); }\n").unwrap();
-        run_full_index(&db, project_dir.path(), None, None).unwrap();
-
-        // No pending rows yet — call resolved at index time.
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
-            "fully-resolvable call must not produce a pending row");
-
-        let caller_b_id = get_node_ids_by_name(db.conn(), "caller_b").unwrap()
-            .into_iter().next().unwrap().0;
-        let foo_id_pre = get_node_ids_by_name(db.conn(), "foo").unwrap()
-            .into_iter().next().unwrap().0;
-        let edges_pre = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
-        assert!(edges_pre.iter().any(|e| e.relation == REL_CALLS && e.target_id == foo_id_pre),
-            "edge caller_b → foo must exist pre-deletion");
-
-        // Delete A. Phase 0 must buffer B's now-orphaned call into pending
-        // BEFORE cascade strips the edge.
-        fs::remove_file(project_dir.path().join("a.ts")).unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        // foo is gone.
-        assert!(get_node_ids_by_name(db.conn(), "foo").unwrap().is_empty(),
-            "foo must be cascade-deleted with file a.ts");
-
-        // B's edge to old foo is gone, but pending row holds the call.
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1,
-            "Phase 0 must buffer the orphaned inbound call into pending");
-
-        // Re-add A — pending sweep promotes the buffered call to a fresh edge.
-        fs::write(project_dir.path().join("a.ts"),
-            "export function foo() {}\n").unwrap();
-        run_incremental_index(&db, project_dir.path(), None, None).unwrap();
-
-        assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
-            "pending must drain once foo reappears");
-
-        let foo_id_post = get_node_ids_by_name(db.conn(), "foo").unwrap()
-            .into_iter().next().unwrap().0;
-        let edges_post = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
-        assert!(edges_post.iter().any(|e| e.relation == REL_CALLS && e.target_id == foo_id_post),
-            "edge caller_b → foo must reappear post re-add via pending sweep");
-    }
-}
diff --git a/src/indexer/pipeline/context.rs b/src/indexer/pipeline/context.rs
new file mode 100644
index 0000000..485f4c4
--- /dev/null
+++ b/src/indexer/pipeline/context.rs
@@ -0,0 +1,197 @@
+//! Context-string assembly for a node + edge bundle, plus the two recovery
+//! paths that re-run that assembly outside the main indexer:
+//! - `regenerate_context_strings`: incremental dirty propagation (rebuilds
+//!   ctx for nodes whose cross-file edges flipped during a re-index).
+//! - `repair_null_context_strings`: startup repair when a prior Phase 3
+//!   transaction failed and left rows with NULL context_string.
+//!
+//! `categorize_edges` and `format_route_from_metadata` are also used by the
+//! main `index_files` Phase 3 builder, so they live here as `pub(super)`.
+
+use anyhow::Result;
+use std::collections::{HashMap, HashSet};
+
+use crate::embedding::context::{build_context_string, NodeContext};
+use crate::embedding::model::EmbeddingModel;
+use crate::storage::db::Database;
+use crate::storage::queries::{
+    get_edges_batch, get_nodes_missing_context, get_nodes_with_files_by_ids,
+    update_context_strings_batch, EdgeInfo, NodeResult,
+};
+use crate::domain::{REL_CALLS, REL_IMPORTS, REL_INHERITS, REL_ROUTES_TO, REL_IMPLEMENTS, REL_EXPORTS};
+
+use super::embed::embed_and_store_batch;
+
+/// Extract "METHOD path" from route edge metadata JSON, falling back to the edge name.
+pub(super) fn format_route_from_metadata(metadata: Option<&str>, name: &str) -> String {
+    if let Some(meta) = metadata {
+        if let Ok(v) = serde_json::from_str::<serde_json::Value>(meta) {
+            let method = v["method"].as_str().unwrap_or("ALL");
+            if let Some(path) = v["path"].as_str() {
+                return format!("{} {}", method, path);
+            }
+        }
+    }
+    name.to_string()
+}
+
+pub(super) struct CategorizedEdges {
+    pub callees: Vec<String>,
+    pub callers: Vec<String>,
+    pub inherits: Vec<String>,
+    pub routes: Vec<String>,
+    pub imports: Vec<String>,
+    pub implements: Vec<String>,
+    pub exports: Vec<String>,
+}
+
+pub(super) fn categorize_edges(edges: Option<&Vec<EdgeInfo>>, format_route: impl Fn(Option<&str>, &str) -> String) -> CategorizedEdges {
+    let mut result = CategorizedEdges {
+        callees: Vec::new(),
+        callers: Vec::new(),
+        inherits: Vec::new(),
+        routes: Vec::new(),
+        imports: Vec::new(),
+        implements: Vec::new(),
+        exports: Vec::new(),
+    };
+    if let Some(edge_list) = edges {
+        for (relation, direction, name, metadata) in edge_list {
+            match (relation.as_str(), direction.as_str()) {
+                (rel, "out") if rel == REL_CALLS => result.callees.push(name.clone()),
+                (rel, "in") if rel == REL_CALLS => result.callers.push(name.clone()),
+                (rel, "out") if rel == REL_INHERITS => result.inherits.push(name.clone()),
+                (rel, "out") if rel == REL_ROUTES_TO => {
+                    result.routes.push(format_route(metadata.as_deref(), name));
+                }
+                (rel, "out") if rel == REL_IMPORTS => result.imports.push(name.clone()),
+                (rel, "out") if rel == REL_IMPLEMENTS => result.implements.push(name.clone()),
+                (rel, "out") if rel == REL_EXPORTS => result.exports.push(name.clone()),
+                _ => {}
+            }
+        }
+    }
+    result
+}
+
+/// Regenerate context strings (and embeddings) for the given set of dirty nodes.
+pub(super) fn regenerate_context_strings(db: &Database, dirty_ids: &HashSet<i64>, model: Option<&EmbeddingModel>) -> Result<()> {
+    let tx = db.conn().unchecked_transaction()?;
+    let id_vec: Vec<i64> = dirty_ids.iter().copied().collect();
+    let all_edges = get_edges_batch(db.conn(), &id_vec)?;
+    let all_nodes: HashMap<i64, (NodeResult, String, Option<String>)> = {
+        let nwfs = get_nodes_with_files_by_ids(db.conn(), &id_vec)?;
+        nwfs.into_iter().map(|nwf| (nwf.node.id, (nwf.node, nwf.file_path, nwf.language))).collect()
+    };
+
+    // Build all context strings first
+    let mut context_updates: Vec<(i64, String)> = Vec::with_capacity(dirty_ids.len());
+    for &node_id in dirty_ids {
+        if let Some((node, file_path, language)) = all_nodes.get(&node_id) {
+            let edges = all_edges.get(&node_id);
+            let cat = categorize_edges(edges, format_route_from_metadata);
+
+            let ctx = build_context_string(&NodeContext {
+                node_type: node.node_type.clone(),
+                name: node.name.clone(),
+                qualified_name: node.qualified_name.clone(),
+                file_path: file_path.clone(),
+                language: language.clone(),
+                signature: node.signature.clone(),
+                return_type: node.return_type.clone(),
+                param_types: node.param_types.clone(),
+                code_content: Some(node.code_content.clone()),
+                routes: cat.routes,
+                callees: cat.callees,
+                callers: cat.callers,
+                inherits: cat.inherits,
+                imports: cat.imports,
+                implements: cat.implements,
+                exports: cat.exports,
+                doc_comment: node.doc_comment.clone(),
+            });
+
+            context_updates.push((node_id, ctx));
+        }
+    }
+
+    // Batch update context strings
+    update_context_strings_batch(db.conn(), &context_updates)?;
+    tx.commit()?;
+
+    // Embed outside the committed tx — recoverable on failure
+    if let Some(m) = model {
+        if db.vec_enabled() {
+            embed_and_store_batch(db, m, &context_updates)?;
+        }
+    }
+    Ok(())
+}
+
+/// Repair nodes that have NULL context_string (likely from a failed Phase 3).
+/// This is called at startup after index verification.
+pub fn repair_null_context_strings(
+    db: &Database,
+    model: Option<&EmbeddingModel>,
+) -> Result<usize> {
+    let missing_ids = get_nodes_missing_context(db.conn())?;
+    if missing_ids.is_empty() {
+        return Ok(0);
+    }
+
+    tracing::info!("[repair] Found {} nodes with NULL context_string, rebuilding...", missing_ids.len());
+
+    // Load node details with file paths
+    let nodes_with_files = get_nodes_with_files_by_ids(db.conn(), &missing_ids)?;
+
+    // Load edges for all affected nodes in one batch
+    let all_edges = get_edges_batch(db.conn(), &missing_ids)?;
+
+    // Build context strings
+    let mut context_updates: Vec<(i64, String)> = Vec::new();
+    for nwf in &nodes_with_files {
+        let node = &nwf.node;
+        let edges = all_edges.get(&node.id);
+        let cat = categorize_edges(edges, format_route_from_metadata);
+
+        let ctx = build_context_string(&NodeContext {
+            node_type: node.node_type.clone(),
+            name: node.name.clone(),
+            qualified_name: node.qualified_name.clone(),
+            file_path: nwf.file_path.clone(),
+            language: nwf.language.clone(),
+            signature: node.signature.clone(),
+            return_type: node.return_type.clone(),
+            param_types: node.param_types.clone(),
+            code_content: Some(node.code_content.clone()),
+            routes: cat.routes,
+            callees: cat.callees,
+            callers: cat.callers,
+            inherits: cat.inherits,
+            imports: cat.imports,
+            implements: cat.implements,
+            exports: cat.exports,
+            doc_comment: node.doc_comment.clone(),
+        });
+
+        context_updates.push((node.id, ctx));
+    }
+
+    // Update in DB within a transaction (avoids per-row fsync under autocommit)
+    if !context_updates.is_empty() {
+        let tx = db.conn().unchecked_transaction()?;
+        update_context_strings_batch(db.conn(), &context_updates)?;
+        tx.commit()?;
+
+        // Re-embed if model available
+        if let Some(m) = model {
+            if db.vec_enabled() {
+                embed_and_store_batch(db, m, &context_updates)?;
+            }
+        }
+    }
+
+    let count = context_updates.len();
+    tracing::info!("[repair] Repaired context strings for {} nodes", count);
+    Ok(count)
+}
diff --git a/src/indexer/pipeline/embed.rs b/src/indexer/pipeline/embed.rs
new file mode 100644
index 0000000..4dbfce0
--- /dev/null
+++ b/src/indexer/pipeline/embed.rs
@@ -0,0 +1,71 @@
+//! Batch embedding + vector store. Wraps `EmbeddingModel::embed_batch` with
+//! a per-batch DB transaction; on batch failure falls back to per-row embed
+//! so a single malformed input doesn't tank the whole sweep.
+//!
+//! Public so `mcp::server` can call it from the background embedding thread
+//! (separate from the indexer's foreground Phase 3 path).
+
+use anyhow::Result;
+
+use crate::embedding::model::EmbeddingModel;
+use crate::storage::db::Database;
+use crate::storage::queries::insert_node_vectors_batch;
+
+/// Embed context strings using batched inference and batch-insert vectors.
+/// Public so the background embedding thread in server.rs can call it.
+/// Wraps vector inserts in a transaction for atomicity and performance.
+pub fn embed_and_store_batch(db: &Database, model: &EmbeddingModel, context_updates: &[(i64, String)]) -> Result<()> {
+    if context_updates.is_empty() {
+        return Ok(());
+    }
+
+    let t0 = std::time::Instant::now();
+    let texts: Vec<&str> = context_updates.iter().map(|(_, ctx)| ctx.as_str()).collect();
+    let ids: Vec<i64> = context_updates.iter().map(|(id, _)| *id).collect();
+
+    let embeddings = match model.embed_batch(&texts) {
+        Ok(embs) => embs,
+        Err(e) => {
+            tracing::warn!("Batch embed failed, falling back to sequential: {}", e);
+            // Fallback: sequential embed
+            let mut embs = Vec::new();
+            for (i, text) in texts.iter().enumerate() {
+                match model.embed(text) {
+                    Ok(emb) => embs.push(Some(emb)),
+                    Err(e2) => {
+                        tracing::warn!("Failed to embed node {}: {}", ids[i], e2);
+                        embs.push(None);
+                    }
+                }
+            }
+            let vectors: Vec<(i64, Vec<f32>)> = ids.iter().zip(embs)
+                .filter_map(|(&id, emb)| emb.map(|e| (id, e)))
+                .collect();
+            if !vectors.is_empty() {
+                let tx = db.conn().unchecked_transaction()?;
+                insert_node_vectors_batch(db.conn(), &vectors)?;
+                tx.commit()?;
+            }
+            tracing::info!("[embed] {} nodes (sequential fallback) in {:.1}s",
+                context_updates.len(), t0.elapsed().as_secs_f64());
+            return Ok(());
+        }
+    };
+
+    let vectors: Vec<(i64, Vec<f32>)> = ids.into_iter().zip(embeddings).collect();
+    let t_embed = t0.elapsed();
+
+    if !vectors.is_empty() {
+        let tx = db.conn().unchecked_transaction()?;
+        insert_node_vectors_batch(db.conn(), &vectors)?;
+        tx.commit()?;
+    }
+
+    tracing::info!("[embed] {} nodes in {:.1}s (embed {:.1}s, store {:.1}s)",
+        context_updates.len(),
+        t0.elapsed().as_secs_f64(),
+        t_embed.as_secs_f64(),
+        (t0.elapsed() - t_embed).as_secs_f64(),
+    );
+    Ok(())
+}
diff --git a/src/indexer/pipeline/index_files.rs b/src/indexer/pipeline/index_files.rs
new file mode 100644
index 0000000..e4ea8db
--- /dev/null
+++ b/src/indexer/pipeline/index_files.rs
@@ -0,0 +1,827 @@
+//! Single-pass batched indexer. Phases share local state (transaction,
+//! atomics, batch_parsed, name_to_ids, global_name_map) so the function
+//! itself stays whole — the *helpers* that feed it (context, embedding,
+//! Python module map, ambiguity refinement, pending-call sweep) live in
+//! sibling modules.
+//!
+//! Phase outline:
+//! - 0: delete files; pre-cascade-buffer inbound calls into pending so
+//!   B → A.foo doesn't silently vanish when only A is in `delete_paths`.
+//! - 1a: parallel CPU work (read + parse + extract nodes) via rayon.
+//! - 1b: sequential DB inserts (file row, node rows; cascades old nodes).
+//! - 2: extract relations, resolve to edges with same-file → same-language
+//!   → drop/global tier order; buffer unresolved bare-name same-language
+//!   calls into pending instead of dropping; track external imports/symbols.
+//! - 2b / 2b-ext: virtual `<external>` nodes for unresolved imports/traits.
+//! - 2c: restore cross-file inbound edges that cascade-delete just stripped.
+//! - 3: build context strings (parallel), batch-update, then embed outside tx.
+//! - 2c sweep: drain `pending_unresolved_calls` against the new node state.
+
+use anyhow::Result;
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
+
+use rayon::prelude::*;
+
+use crate::embedding::context::{build_context_string, NodeContext};
+use crate::embedding::model::EmbeddingModel;
+use crate::indexer::merkle::hash_file;
+use crate::parser::relations::extract_relations_from_tree;
+use crate::parser::treesitter::{parse_tree, extract_nodes_from_tree};
+use crate::search::tokenizer::split_identifier;
+use crate::storage::db::Database;
+use crate::storage::queries::{
+    delete_files_by_paths, delete_nodes_by_file,
+    get_all_node_names_with_ids, get_edges_batch,
+    get_inbound_cross_file_edges,
+    get_nodes_by_file_path,
+    get_nodes_with_files_by_ids,
+    insert_edge_cached, insert_node_cached,
+    update_context_strings_batch, upsert_file,
+    FileRecord, NodeRecord, NodeResult,
+};
+use crate::domain::{REL_CALLS, REL_IMPORTS, REL_ROUTES_TO, REL_IMPLEMENTS, max_file_size, CROSS_FILE_CALL_NOISE};
+use crate::utils::config::detect_language;
+
+use super::{IndexResult, IndexStats, ProgressFn};
+use super::context::{categorize_edges, format_route_from_metadata};
+use super::embed::embed_and_store_batch;
+use super::python_modules::{build_python_module_map, resolve_python_module_targets};
+use super::resolve::{refine_ambiguous_targets, resolve_pending_calls};
+
+/// Batch size for streaming indexing. Each batch processes Phase 1+2
+/// then drops heavyweight data (ASTs, source strings) before the next batch.
+const BATCH_SIZE: usize = 500;
+
+/// Lightweight post-batch record — no Tree or source string.
+pub(super) struct FileIndexed {
+    pub rel_path: String,
+    pub node_ids: Vec<i64>,
+    pub node_names: Vec<String>,
+}
+
+pub(super) fn index_files(
+    db: &Database,
+    root: &Path,
+    files: &[String],
+    hashes: &HashMap<String, String>,
+    model: Option<&EmbeddingModel>,
+    delete_paths: &[String],
+    progress: Option<ProgressFn>,
+) -> Result<IndexResult> {
+    // SAFETY: unchecked_transaction is used because rusqlite's Transaction borrows
+    // &mut Connection, preventing other borrows during the transaction. Here we need
+    // both the transaction and read access via db.conn() (which returns &Connection
+    // to the same underlying connection). This is safe because:
+    // (1) db.conn() returns the same Connection the tx was opened on,
+    // (2) we never open nested transactions,
+    // (3) concurrent access (e.g. background embedding thread) uses separate
+    //     DB connections; safety relies on SQLite WAL mode + busy_timeout(5000),
+    //     not single-threadedness.
+
+    use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
+    let skipped_size = AtomicUsize::new(0);
+    let skipped_parse = AtomicUsize::new(0);
+    let skipped_read = AtomicUsize::new(0);
+    let skipped_hash = AtomicUsize::new(0);
+    let skipped_language = AtomicUsize::new(0);
+
+    let mut total_nodes_created = 0usize;
+    let mut total_edges_created = 0usize;
+    let mut all_indexed: Vec<FileIndexed> = Vec::new();
+
+    // Phase 0: Delete removed files in own transaction.
+    //
+    // Before cascade strips inbound REL_CALLS edges, capture them as pending
+    // rows. Without this, deleting file A wipes B's edge to A.foo and B is
+    // not in `delete_paths` (so Phase 2 won't re-extract it), leaving B with
+    // neither an edge nor a pending row — the same staleness window the
+    // "callee added later" buffering closes, just from the deletion side.
+    // Both directions need to round-trip through pending or the v0.18.2 fix
+    // is only half-complete.
+    if !delete_paths.is_empty() {
+        let tx = db.conn().unchecked_transaction()?;
+
+        // Resolve file IDs once (delete_files_by_paths drops them) so we can
+        // query inbound calls before cascade fires.
+        let mut deleted_file_ids: Vec<i64> = Vec::with_capacity(delete_paths.len());
+        for path in delete_paths {
+            if let Ok(Some(fid)) = db.conn().query_row(
+                "SELECT id FROM files WHERE path = ?1",
+                [path],
+                |row| row.get::<_, Option<i64>>(0),
+            ) {
+                deleted_file_ids.push(fid);
+            }
+        }
+
+        let mut buffered = 0usize;
+        for fid in &deleted_file_ids {
+            let inbound = crate::storage::queries::get_inbound_calls_for_pending(db.conn(), *fid)?;
+            for (source_id, target_name, source_language, metadata) in inbound {
+                crate::storage::queries::insert_pending_unresolved_call(
+                    db.conn(),
+                    source_id,
+                    &target_name,
+                    &source_language,
+                    metadata.as_deref(),
+                )?;
+                buffered += 1;
+            }
+        }
+        if buffered > 0 {
+            tracing::info!(
+                "[index] Phase 0: buffered {} inbound calls before cascade-deleting {} file(s)",
+                buffered, deleted_file_ids.len()
+            );
+        }
+
+        delete_files_by_paths(db.conn(), delete_paths)?;
+        tx.commit()?;
+    }
+
+    // CPU-bound parse result — produced in parallel, consumed sequentially for DB insert
+    struct FilePreParsed {
+        rel_path: String,
+        source: String,
+        language: String,
+        tree: tree_sitter::Tree,
+        hash: String,
+        last_modified: i64,
+        parsed_nodes: Vec<crate::parser::treesitter::ParsedNode>,
+    }
+
+    // Pre-build Python module map once (used in all batches for import resolution)
+    let mut all_python_paths: HashSet<String> = files.iter()
+        .filter(|f| f.ends_with(".py"))
+        .cloned()
+        .collect();
+    {
+        let mut stmt = db.conn().prepare("SELECT path FROM files WHERE path LIKE '%.py'")?;
+        let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
+        for row in rows {
+            all_python_paths.insert(row?);
+        }
+    }
+    let python_module_map = build_python_module_map(&all_python_paths);
+
+    // Pre-load global name->[(id, path, language)] map once before the batch loop.
+    // This avoids a full table scan per batch in Phase 2 relation resolution.
+    // The map is updated incrementally as each batch commits new nodes.
+    // `language` drives same-language-preferred resolution to avoid cross-language
+    // bare-name collisions (e.g. Rust `hasher.update()` resolving to JS `function update`).
+    let mut global_name_map: HashMap<String, Vec<crate::storage::queries::NameEntry>> =
+        get_all_node_names_with_ids(db.conn())?;
+
+    // Heavyweight per-file data used during Phase 1+2, dropped after each batch
+    #[allow(dead_code)]
+    struct FileParsed {
+        rel_path: String,
+        source: String,
+        language: String,
+        tree: tree_sitter::Tree,
+        file_id: i64,
+        node_ids: Vec<i64>,
+        node_names: Vec<String>,
+    }
+
+    // Process files in batches — each batch does Phase 1 + Phase 2
+    for batch in files.chunks(BATCH_SIZE) {
+        let tx = db.conn().unchecked_transaction()?;
+
+        // --- Phase 1a: Parallel CPU-bound work (read + parse + extract nodes) ---
+        let pre_parsed: Vec<FilePreParsed> = batch
+            .par_iter()
+            .filter_map(|rel_path| {
+                let language = match detect_language(rel_path) {
+                    Some(l) => l,
+                    None => {
+                        skipped_language.fetch_add(1, AtomicOrdering::Relaxed);
+                        return None;
+                    }
+                };
+                let abs_path = root.join(rel_path);
+
+                let file_meta = std::fs::metadata(&abs_path).ok();
+                if let Some(ref meta) = file_meta {
+                    if meta.len() > max_file_size() {
+                        tracing::debug!("Skipping large file ({} bytes): {}", meta.len(), rel_path);
+                        skipped_size.fetch_add(1, AtomicOrdering::Relaxed);
+                        return None;
+                    }
+                }
+
+                let source = match std::fs::read_to_string(&abs_path) {
+                    Ok(s) => s,
+                    Err(e) => {
+                        tracing::warn!("Skipping file {}: {}", rel_path, e);
+                        skipped_read.fetch_add(1, AtomicOrdering::Relaxed);
+                        return None;
+                    }
+                };
+
+                let hash = match hashes.get(rel_path.as_str()) {
+                    Some(h) => h.clone(),
+                    None => match hash_file(&abs_path) {
+                        Ok(h) => h,
+                        Err(e) => {
+                            tracing::warn!("Skipping file (hash error): {}: {}", rel_path, e);
+                            skipped_hash.fetch_add(1, AtomicOrdering::Relaxed);
+                            return None;
+                        }
+                    },
+                };
+
+                let tree = match parse_tree(&source, language) {
+                    Ok(t) => t,
+                    Err(e) => {
+                        tracing::warn!("Parse failed for {}: {}", rel_path, e);
+                        skipped_parse.fetch_add(1, AtomicOrdering::Relaxed);
+                        return None;
+                    }
+                };
+
+                let last_modified = file_meta
+                    .and_then(|m| m.modified().ok())
+                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
+                    .map(|d| d.as_secs() as i64)
+                    .unwrap_or(0);
+
+                let parsed_nodes = extract_nodes_from_tree(&tree, &source, language);
+
+                Some(FilePreParsed {
+                    rel_path: rel_path.clone(),
+                    source,
+                    language: language.to_string(),
+                    tree,
+                    hash,
+                    last_modified,
+                    parsed_nodes,
+                })
+            })
+            .collect();
+
+        let mut batch_parsed: Vec<FileParsed> = Vec::new();
+        // Saved inbound edges from other files → batch files (to restore after cascade delete)
+        // Tuple: (source_id, source_file_id, target_name, relation, metadata)
+        let mut saved_inbound_edges: Vec<(i64, i64, String, String, Option<String>)> = Vec::new();
+        // Track file_ids in this batch to filter intra-batch edges in Phase 2c
+        let mut batch_file_ids: HashSet<i64> = HashSet::new();
+
+        // --- Phase 1b: Sequential DB inserts ---
+        for pp in pre_parsed {
+            let file_id = upsert_file(db.conn(), &FileRecord {
+                path: pp.rel_path.clone(),
+                blake3_hash: pp.hash,
+                last_modified: pp.last_modified,
+                language: Some(pp.language.clone()),
+            })?;
+
+            // Save cross-file inbound edges before cascade delete destroys them
+            saved_inbound_edges.extend(get_inbound_cross_file_edges(db.conn(), file_id)?);
+            batch_file_ids.insert(file_id);
+
+            delete_nodes_by_file(db.conn(), file_id)?;
+
+            let mut node_ids = Vec::new();
+            let mut node_names = Vec::new();
+
+            let module_node_id = insert_node_cached(db.conn(), &NodeRecord {
+                file_id,
+                node_type: "module".into(),
+                name: "<module>".into(),
+                qualified_name: Some(pp.rel_path.clone()),
+                start_line: 1,
+                end_line: pp.source.lines().count() as i64,
+                code_content: String::new(),
+                signature: None,
+                doc_comment: None,
+                context_string: None,
+                name_tokens: None,
+                return_type: None,
+                param_types: None,
+                is_test: false,
+            })?;
+            node_ids.push(module_node_id);
+            node_names.push("<module>".into());
+            total_nodes_created += 1;
+
+            for pn in &pp.parsed_nodes {
+                let name_tokens = split_identifier(&pn.name);
+                let node_id = insert_node_cached(db.conn(), &NodeRecord {
+                    file_id,
+                    node_type: pn.node_type.clone(),
+                    name: pn.name.clone(),
+                    qualified_name: pn.qualified_name.clone(),
+                    start_line: pn.start_line as i64,
+                    end_line: pn.end_line as i64,
+                    code_content: pn.code_content.clone(),
+                    signature: pn.signature.clone(),
+                    doc_comment: pn.doc_comment.clone(),
+                    context_string: None,
+                    name_tokens: Some(name_tokens),
+                    return_type: pn.return_type.clone(),
+                    param_types: pn.param_types.clone(),
+                    is_test: pn.is_test,
+                })?;
+                node_ids.push(node_id);
+                node_names.push(pn.name.clone());
+                total_nodes_created += 1;
+            }
+
+            batch_parsed.push(FileParsed {
+                rel_path: pp.rel_path,
+                source: pp.source,
+                language: pp.language,
+                tree: pp.tree,
+                file_id,
+                node_ids,
+                node_names,
+            });
+        }
+
+        // --- Phase 2: Extract relations + insert edges ---
+        // Build per-batch name_to_ids and node_id_to_path from the pre-loaded global map,
+        // excluding files in the current batch (their old nodes were deleted in Phase 1b).
+        let batch_file_paths: HashSet<&str> = batch_parsed.iter()
+            .map(|pf| pf.rel_path.as_str()).collect();
+
+        let mut name_to_ids: HashMap<String, Vec<i64>> = HashMap::new();
+        let mut node_id_to_path: HashMap<i64, String> = HashMap::new();
+        // Per-node language for same-language-preferred edge resolution (§ cross-lang collision).
+        let mut node_id_to_language: HashMap<i64, Option<String>> = HashMap::new();
+
+        // Add current batch's newly inserted nodes
+        for pf in &batch_parsed {
+            for (id, name) in pf.node_ids.iter().zip(pf.node_names.iter()) {
+                name_to_ids.entry(name.clone()).or_default().push(*id);
+                node_id_to_path.insert(*id, pf.rel_path.clone());
+                node_id_to_language.insert(*id, Some(pf.language.clone()));
+            }
+        }
+
+        // Add nodes from the global map, excluding those in current batch's files
+        // (their old nodes were deleted and replaced by new ones above)
+        for (name, entries) in &global_name_map {
+            for (id, path, language) in entries {
+                if !batch_file_paths.contains(path.as_str()) {
+                    name_to_ids.entry(name.clone()).or_default().push(*id);
+                    node_id_to_path.insert(*id, path.clone());
+                    node_id_to_language.insert(*id, language.clone());
+                }
+            }
+        }
+
+        for ids in name_to_ids.values_mut() {
+            ids.sort();
+            ids.dedup();
+        }
+
+        // Track unresolved external Python imports: (source_module_node_id, module_name)
+        let mut external_python_imports: Vec<(i64, String)> = Vec::new();
+        // Track unresolved external symbols for sentinel node creation:
+        // (source_id, target_name, relation) — e.g., implements edges to external traits
+        let mut unresolved_externals: Vec<(i64, String, String)> = Vec::new();
+
+        for pf in &batch_parsed {
+            let relations = extract_relations_from_tree(&pf.tree, &pf.source, &pf.language);
+            let local_ids: HashSet<i64> = pf.node_ids.iter().copied().collect();
+
+            for rel in &relations {
+                // Contract: extract_relations_from_tree stamps every relation with
+                // source_language equal to the language argument. The
+                // same-language resolution at line 811+ depends on it. Hard
+                // error instead of debug_assert so a parser regression fails
+                // loudly in release builds too (one string compare per
+                // relation is negligible against the SQL writes below).
+                if rel.source_language != pf.language {
+                    anyhow::bail!(
+                        "ParsedRelation.source_language ({}) does not match file language ({}); \
+                         parser regressed the source_language contract",
+                        rel.source_language, pf.language
+                    );
+                }
+
+                let source_ids = pf.node_names.iter()
+                    .zip(pf.node_ids.iter())
+                    .filter(|(name, _)| *name == &rel.source_name)
+                    .map(|(_, id)| *id)
+                    .collect::<Vec<_>>();
+
+                // Try Python module-constrained resolution for import edges
+                if rel.relation == REL_IMPORTS {
+                    if let Some(ref meta_str) = rel.metadata {
+                        if let Ok(meta) = serde_json::from_str::<serde_json::Value>(meta_str) {
+                            if let Some(python_module) = meta.get("python_module").and_then(|v| v.as_str()) {
+                                let is_module_import = meta.get("is_module_import")
+                                    .and_then(|v| v.as_bool()).unwrap_or(false);
+                                if python_module_map.contains_key(python_module) {
+                                    // Internal module — try constrained resolution
+                                    if let Some(module_targets) = resolve_python_module_targets(
+                                        python_module, is_module_import, &rel.target_name,
+                                        &python_module_map, &node_id_to_path, &name_to_ids,
+                                    ) {
+                                        for &src_id in &source_ids {
+                                            for &tgt_id in &module_targets {
+                                                if src_id != tgt_id
+                                                    && insert_edge_cached(db.conn(), src_id, tgt_id, &rel.relation, rel.metadata.as_deref())? {
+                                                    total_edges_created += 1;
+                                                }
+                                            }
+                                        }
+                                        continue;
+                                    }
+                                    // Module found but symbol not found — fall through to default
+                                } else {
+                                    // External module — track for virtual node creation.
+                                    // For `from X import Y`, we track the module-level dependency (X),
+                                    // not the individual symbol (Y), since we can't index external code.
+                                    for &src_id in &source_ids {
+                                        external_python_imports.push((src_id, python_module.to_string()));
+                                    }
+                                    continue; // No point in default resolution for external imports
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Default resolution: global name-based lookup with language-aware layering.
+                // Tier order: same-file → same-language → (calls: drop) / (other: global).
+                // Dropping calls without a same-language match prevents Rust `hasher.update()`
+                // binding to an unrelated JS `function update()` via bare-name collision.
+                let all_target_ids = name_to_ids.get(&rel.target_name)
+                    .cloned()
+                    .unwrap_or_default();
+
+                let same_file_targets: Vec<i64> = all_target_ids.iter()
+                    .filter(|id| local_ids.contains(id))
+                    .copied()
+                    .collect();
+
+                let source_lang = pf.language.as_str();
+                let same_language_targets: Vec<i64> = all_target_ids.iter()
+                    .filter(|id| !local_ids.contains(id))
+                    .filter(|id| matches!(
+                        node_id_to_language.get(id).and_then(|l| l.as_deref()),
+                        Some(l) if l == source_lang
+                    ))
+                    .copied()
+                    .collect();
+
+                let target_ids = if !same_file_targets.is_empty() {
+                    same_file_targets
+                } else if rel.relation == REL_CALLS
+                    && CROSS_FILE_CALL_NOISE.contains(&rel.target_name.as_str())
+                {
+                    // Stdlib method names (new/default/from) — drop regardless of language.
+                    continue;
+                } else if !same_language_targets.is_empty() {
+                    // Ambiguous cross-file same-language candidates (e.g. a helper
+                    // name like `readJson` defined in multiple JS files) used to
+                    // fan out — every same-name target got an edge, producing
+                    // phantom callers across unrelated modules. Refine by
+                    // non-test preference + longest common path prefix with the
+                    // caller file. See `refine_ambiguous_targets` for fallback
+                    // policy (keeps remaining pool on ambiguity to avoid
+                    // regressing dead-code on bare-name Rust scoped calls).
+                    refine_ambiguous_targets(
+                        &same_language_targets,
+                        &pf.rel_path,
+                        &node_id_to_path,
+                    )
+                } else if rel.relation == REL_CALLS {
+                    // No same-file, no same-language candidate → buffer in
+                    // pending_unresolved_calls instead of silently dropping.
+                    // The post-Phase-2 sweep below promotes the row to a real
+                    // edge as soon as a same-language target appears (e.g.
+                    // sibling file added in a later incremental pass). Memory
+                    // `feedback_incremental_edge_timing.md` documented the bug
+                    // this closes: B's bare-name call to `foo()` got dropped
+                    // when foo didn't exist yet, and never re-resolved when A
+                    // later added `foo`. Schema cascade on source_id self-cleans
+                    // when callers are removed/reindexed.
+                    for &src_id in &source_ids {
+                        crate::storage::queries::insert_pending_unresolved_call(
+                            db.conn(),
+                            src_id,
+                            &rel.target_name,
+                            &pf.language,
+                            rel.metadata.as_deref(),
+                        )?;
+                    }
+                    continue;
+                } else {
+                    all_target_ids
+                };
+
+                if target_ids.is_empty()
+                    && (rel.relation == REL_IMPLEMENTS || rel.relation == REL_IMPORTS)
+                {
+                    // Unresolved implements target (external trait like Write, Default)
+                    // OR unresolved import target (JS `require('fs')`, unresolved JS
+                    // ES-import binding). Phase 2b-ext creates `<external>/<name>`
+                    // sentinel nodes so the dependency graph shows the link.
+                    for &src_id in &source_ids {
+                        unresolved_externals.push((src_id, rel.target_name.clone(), rel.relation.clone()));
+                    }
+                } else {
+                    for &src_id in &source_ids {
+                        for &tgt_id in &target_ids {
+                            if (src_id != tgt_id || rel.relation == REL_ROUTES_TO)
+                                && insert_edge_cached(db.conn(), src_id, tgt_id, &rel.relation, rel.metadata.as_deref())? {
+                                total_edges_created += 1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Phase 2b: Create virtual nodes for external Python imports
+        if !external_python_imports.is_empty() {
+            let ext_file_id = upsert_file(db.conn(), &FileRecord {
+                path: "<external>".into(),
+                blake3_hash: "external".into(),
+                last_modified: 0,
+                language: Some("external".into()),
+            })?;
+
+            // Load existing external module nodes to avoid duplicates
+            let existing_ext_nodes: HashMap<String, i64> =
+                get_nodes_by_file_path(db.conn(), "<external>")?
+                    .into_iter()
+                    .map(|n| (n.name.clone(), n.id))
+                    .collect();
+
+            let unique_modules: HashSet<String> = external_python_imports.iter()
+                .map(|(_, m)| m.clone()).collect();
+
+            let mut ext_node_ids: HashMap<String, i64> = existing_ext_nodes;
+            for module_name in &unique_modules {
+                if !ext_node_ids.contains_key(module_name) {
+                    let node_id = insert_node_cached(db.conn(), &NodeRecord {
+                        file_id: ext_file_id,
+                        node_type: "external_module".into(),
+                        name: module_name.clone(),
+                        qualified_name: Some(format!("<external>/{}", module_name)),
+                        start_line: 0,
+                        end_line: 0,
+                        code_content: String::new(),
+                        signature: None,
+                        doc_comment: None,
+                        context_string: None,
+                        name_tokens: None,
+                        return_type: None,
+                        param_types: None,
+                        is_test: false,
+                    })?;
+                    ext_node_ids.insert(module_name.clone(), node_id);
+                    total_nodes_created += 1;
+                }
+            }
+
+            for (source_id, module_name) in &external_python_imports {
+                if let Some(&ext_id) = ext_node_ids.get(module_name) {
+                    if insert_edge_cached(db.conn(), *source_id, ext_id, REL_IMPORTS, None)? {
+                        total_edges_created += 1;
+                    }
+                }
+            }
+        }
+
+        // Phase 2b-ext: Create sentinel nodes for unresolved external symbols
+        // (e.g., Rust `impl Write for SharedStdout` where Write is from std::io)
+        if !unresolved_externals.is_empty() {
+            let ext_file_id = upsert_file(db.conn(), &FileRecord {
+                path: "<external>".into(),
+                blake3_hash: "external".into(),
+                last_modified: 0,
+                language: Some("external".into()),
+            })?;
+
+            let existing_ext_nodes: HashMap<String, i64> =
+                get_nodes_by_file_path(db.conn(), "<external>")?
+                    .into_iter()
+                    .map(|n| (n.name.clone(), n.id))
+                    .collect();
+
+            let mut ext_node_ids: HashMap<String, i64> = existing_ext_nodes;
+
+            // Collect unique targets with inferred type
+            let unique_targets: HashMap<&str, &str> = unresolved_externals.iter()
+                .map(|(_, name, rel)| {
+                    let node_type = if rel == REL_IMPLEMENTS { "trait" } else { "module" };
+                    (name.as_str(), node_type)
+                })
+                .collect();
+
+            for (&name, &node_type) in &unique_targets {
+                if !ext_node_ids.contains_key(name) {
+                    let node_id = insert_node_cached(db.conn(), &NodeRecord {
+                        file_id: ext_file_id,
+                        node_type: node_type.into(),
+                        name: name.into(),
+                        qualified_name: Some(format!("<external>/{}", name)),
+                        start_line: 0,
+                        end_line: 0,
+                        code_content: String::new(),
+                        signature: None,
+                        doc_comment: None,
+                        context_string: None,
+                        name_tokens: None,
+                        return_type: None,
+                        param_types: None,
+                        is_test: false,
+                    })?;
+                    ext_node_ids.insert(name.into(), node_id);
+                    total_nodes_created += 1;
+                }
+            }
+
+            for (source_id, target_name, relation) in &unresolved_externals {
+                if let Some(&ext_id) = ext_node_ids.get(target_name.as_str()) {
+                    if insert_edge_cached(db.conn(), *source_id, ext_id, relation, None)? {
+                        total_edges_created += 1;
+                    }
+                }
+            }
+        }
+
+        // Phase 2c: Restore cross-file inbound edges lost to cascade delete.
+        // When a file is re-indexed, its old nodes are deleted (cascade-deleting edges).
+        // Edges from OTHER files into the re-indexed file must be rebuilt using new node IDs.
+        if !saved_inbound_edges.is_empty() {
+            // Build name → new_node_id map for batch files only
+            let mut batch_name_to_ids: HashMap<&str, Vec<i64>> = HashMap::new();
+            for pf in &batch_parsed {
+                for (id, name) in pf.node_ids.iter().zip(pf.node_names.iter()) {
+                    batch_name_to_ids.entry(name.as_str()).or_default().push(*id);
+                }
+            }
+
+            let mut restored = 0usize;
+            let mut skipped_intra_batch = 0usize;
+            for (source_id, source_file_id, target_name, relation, metadata) in &saved_inbound_edges {
+                // Source file is also in this batch — source_id is stale (deleted + re-created).
+                // Phase 2 already resolves cross-file edges for intra-batch files.
+                if batch_file_ids.contains(source_file_id) {
+                    skipped_intra_batch += 1;
+                    continue;
+                }
+                if let Some(new_target_ids) = batch_name_to_ids.get(target_name.as_str()) {
+                    for &new_tgt_id in new_target_ids {
+                        if *source_id != new_tgt_id
+                            && insert_edge_cached(db.conn(), *source_id, new_tgt_id, relation, metadata.as_deref())? {
+                            total_edges_created += 1;
+                            restored += 1;
+                        }
+                    }
+                }
+            }
+            if restored > 0 || skipped_intra_batch > 0 {
+                tracing::debug!("[index] Restored {} cross-file inbound edges, skipped {} intra-batch", restored, skipped_intra_batch);
+            }
+        }
+
+        tx.commit()?;
+
+        let batch_file_count = batch_parsed.len();
+
+        // Update global_name_map: remove old entries for batch files, add new ones
+        for (_, entries) in global_name_map.iter_mut() {
+            entries.retain(|(_id, path, _lang)| !batch_file_paths.contains(path.as_str()));
+        }
+        global_name_map.retain(|_, entries| !entries.is_empty());
+
+        // Convert to lightweight records — drops Tree and source string
+        for pf in batch_parsed {
+            // Add newly committed nodes to the global map
+            let pf_lang = Some(pf.language.clone());
+            for (id, name) in pf.node_ids.iter().zip(pf.node_names.iter()) {
+                global_name_map.entry(name.clone())
+                    .or_default()
+                    .push((*id, pf.rel_path.clone(), pf_lang.clone()));
+            }
+            all_indexed.push(FileIndexed {
+                rel_path: pf.rel_path,
+                node_ids: pf.node_ids,
+                node_names: pf.node_names,
+            });
+            // pf.tree and pf.source are dropped here — memory freed
+        }
+
+        // Report progress after each batch
+        if let Some(cb) = progress {
+            cb(all_indexed.len(), files.len());
+        }
+
+        if files.len() > BATCH_SIZE {
+            tracing::info!(
+                "[index] batch {}/{}: {} files ({} nodes, {} edges)",
+                all_indexed.len(), files.len(),
+                batch_file_count, total_nodes_created, total_edges_created
+            );
+        }
+    }
+
+    // Phase 3: Build context strings + embeddings (single transaction, lightweight)
+    if !all_indexed.is_empty() {
+        let tx = db.conn().unchecked_transaction()?;
+        let all_node_ids: Vec<i64> = all_indexed.iter()
+            .flat_map(|fi| fi.node_ids.iter().copied()).collect();
+        let all_edges = get_edges_batch(db.conn(), &all_node_ids)?;
+        let all_node_details: HashMap<i64, (NodeResult, Option<String>)> = {
+            let nodes = get_nodes_with_files_by_ids(db.conn(), &all_node_ids)?;
+            nodes.into_iter().map(|nwf| (nwf.node.id, (nwf.node, nwf.language))).collect()
+        };
+
+        // Phase 3a: Build all context strings (CPU-bound, parallelized with rayon)
+        // Flatten to (node_id, node_name, file_path) tuples for parallel iteration
+        let node_tasks: Vec<(i64, &str, &str)> = all_indexed.iter()
+            .flat_map(|fi| {
+                fi.node_ids.iter().enumerate().map(move |(idx, &node_id)| {
+                    (node_id, fi.node_names[idx].as_str(), fi.rel_path.as_str())
+                })
+            })
+            .collect();
+
+        let context_updates: Vec<(i64, String)> = node_tasks.par_iter()
+            .map(|&(node_id, node_name, file_path)| {
+                let edges = all_edges.get(&node_id);
+                let cat = categorize_edges(edges, format_route_from_metadata);
+                let node_detail = all_node_details.get(&node_id);
+
+                let ctx = build_context_string(&NodeContext {
+                    node_type: node_detail.map(|(n, _)| n.node_type.clone()).unwrap_or_default(),
+                    name: node_name.to_string(),
+                    qualified_name: node_detail.and_then(|(n, _)| n.qualified_name.clone()),
+                    file_path: file_path.to_string(),
+                    language: node_detail.and_then(|(_, lang)| lang.clone()),
+                    signature: node_detail.and_then(|(n, _)| n.signature.clone()),
+                    return_type: node_detail.and_then(|(n, _)| n.return_type.clone()),
+                    param_types: node_detail.and_then(|(n, _)| n.param_types.clone()),
+                    code_content: node_detail.map(|(n, _)| n.code_content.clone()),
+                    routes: cat.routes,
+                    callees: cat.callees,
+                    callers: cat.callers,
+                    inherits: cat.inherits,
+                    imports: cat.imports,
+                    implements: cat.implements,
+                    exports: cat.exports,
+                    doc_comment: node_detail.and_then(|(n, _)| n.doc_comment.clone()),
+                });
+
+                (node_id, ctx)
+            })
+            .collect();
+
+        // Phase 3b: Batch update context strings in DB
+        update_context_strings_batch(db.conn(), &context_updates)?;
+        tx.commit()?;
+
+        tracing::info!(
+            "[index] Phase 3: context strings built for {} nodes",
+            all_node_ids.len()
+        );
+
+        // Phase 3c: Embed outside the committed tx — recoverable on failure via repair_null_context_strings
+        if let Some(m) = model {
+            if db.vec_enabled() {
+                embed_and_store_batch(db, m, &context_updates)?;
+            }
+        }
+    }
+
+    // Phase 2c: sweep pending_unresolved_calls — promote any rows whose
+    // target_name now resolves against a same-language node. Cheap when the
+    // table is empty (typical after a full index of a self-contained codebase).
+    let pending_resolved = resolve_pending_calls(db)?;
+    total_edges_created += pending_resolved;
+    if pending_resolved > 0 {
+        tracing::info!(
+            "[index] Phase 2c: resolved {} pending unresolved calls",
+            pending_resolved
+        );
+    }
+
+    // Optimize query planner statistics after bulk writes
+    if !all_indexed.is_empty() {
+        let _ = db.run_optimize();
+    }
+
+    let stats = IndexStats {
+        files_skipped_size: skipped_size.load(AtomicOrdering::Relaxed),
+        files_skipped_parse: skipped_parse.load(AtomicOrdering::Relaxed),
+        files_skipped_read: skipped_read.load(AtomicOrdering::Relaxed),
+        files_skipped_hash: skipped_hash.load(AtomicOrdering::Relaxed),
+        files_skipped_language: skipped_language.load(AtomicOrdering::Relaxed),
+    };
+
+    Ok(IndexResult {
+        files_indexed: all_indexed.len(),
+        nodes_created: total_nodes_created,
+        edges_created: total_edges_created,
+        stats,
+    })
+}
diff --git a/src/indexer/pipeline/mod.rs b/src/indexer/pipeline/mod.rs
new file mode 100644
index 0000000..d2dec7d
--- /dev/null
+++ b/src/indexer/pipeline/mod.rs
@@ -0,0 +1,237 @@
+//! Indexer pipeline. Public entry points + per-concern submodules:
+//! - `embed`: batch embedding store
+//! - `context`: context-string assembly + recovery paths
+//! - `python_modules`: dotted-path → file-path resolution map
+//! - `resolve`: ambiguous-target refinement + pending-call sweep
+//! - `index_files`: the giant Phase-0..3 orchestrator (kept whole — its
+//!   phases share local transaction/atomics/batch state)
+
+use anyhow::Result;
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
+
+use crate::embedding::model::EmbeddingModel;
+use crate::indexer::merkle::{compute_diff, scan_directory, scan_directory_cached, DirectoryCache};
+use crate::storage::db::Database;
+use crate::storage::queries::{
+    delete_files_by_paths, get_all_file_hashes, get_dirty_node_ids,
+};
+
+mod embed;
+mod context;
+mod python_modules;
+mod resolve;
+mod index_files;
+
+#[cfg(test)]
+mod tests;
+
+pub use embed::embed_and_store_batch;
+pub use context::repair_null_context_strings;
+
+use context::regenerate_context_strings;
+use index_files::index_files;
+
+/// Counters for indexing observability — tracks skipped items.
+#[derive(Debug, Clone, Default)]
+pub struct IndexStats {
+    pub files_skipped_size: usize,
+    pub files_skipped_parse: usize,
+    pub files_skipped_read: usize,
+    pub files_skipped_hash: usize,
+    pub files_skipped_language: usize,
+}
+
+pub struct IndexResult {
+    pub files_indexed: usize,
+    pub nodes_created: usize,
+    pub edges_created: usize,
+    pub stats: IndexStats,
+}
+
+/// Progress callback: called with (files_done, files_total) after each batch.
+pub type ProgressFn<'a> = &'a dyn Fn(usize, usize);
+
+pub fn run_full_index(db: &Database, project_root: &Path, model: Option<&EmbeddingModel>, progress: Option<ProgressFn>) -> Result<IndexResult> {
+    let current_hashes = scan_directory(project_root)?;
+    let files: Vec<String> = current_hashes.keys().cloned().collect();
+    index_files(db, project_root, &files, &current_hashes, model, &[], progress)
+}
+
+/// Reindex a single file when its on-disk hash differs from the stored hash.
+/// No-op when the hashes match (or `rel_path` was never indexed in a way that
+/// would currently reindex it). Returns true when a reindex (or stale-row
+/// cleanup) actually fired.
+///
+/// Used by query-time freshness: when an MCP tool receives an explicit
+/// `file_path` argument, the agent is signaling "I just edited this; please
+/// answer against the current bytes." The 30s `last_incremental_check`
+/// debounce in the server is too coarse for tight Edit→search loops.
+///
+/// Cross-file dirty-edge handling mirrors `run_incremental_index`: collect
+/// dirty node IDs **before** re-indexing (cascade delete strips old edges),
+/// then regenerate context strings + embeddings once the new nodes exist.
+pub fn ensure_file_indexed(
+    db: &Database,
+    project_root: &Path,
+    rel_path: &str,
+    model: Option<&EmbeddingModel>,
+) -> Result<bool> {
+    let abs_path = project_root.join(rel_path);
+
+    // Missing-file path: drop stale row so future queries don't return phantom nodes.
+    if !abs_path.is_file() {
+        let exists_in_db: Option<i64> = db.conn().query_row(
+            "SELECT id FROM files WHERE path = ?1",
+            [rel_path],
+            |row| row.get(0),
+        ).ok();
+        if exists_in_db.is_some() {
+            let tx = db.conn().unchecked_transaction()?;
+            delete_files_by_paths(db.conn(), &[rel_path.to_string()])?;
+            tx.commit()?;
+            return Ok(true);
+        }
+        return Ok(false);
+    }
+
+    // Skip files we wouldn't index in the first place (binary / wrong language).
+    if crate::utils::config::detect_language(rel_path).is_none() {
+        return Ok(false);
+    }
+
+    let on_disk_hash = crate::indexer::merkle::hash_file(&abs_path)?;
+    let stored_hash: Option<String> = db.conn().query_row(
+        "SELECT blake3_hash FROM files WHERE path = ?1",
+        [rel_path],
+        |row| row.get(0),
+    ).ok();
+
+    if stored_hash.as_deref() == Some(&on_disk_hash) {
+        return Ok(false);
+    }
+
+    // Cross-file edges into this file's nodes need their context strings rebuilt
+    // *after* the node IDs are replaced — capture the dirty set BEFORE re-indexing.
+    let dirty_node_ids = collect_dirty_node_ids(db, std::slice::from_ref(&rel_path.to_string()))?;
+
+    let mut hashes: HashMap<String, String> = HashMap::new();
+    hashes.insert(rel_path.to_string(), on_disk_hash);
+    let files = vec![rel_path.to_string()];
+    index_files(db, project_root, &files, &hashes, model, &[], None)?;
+
+    if !dirty_node_ids.is_empty() {
+        regenerate_context_strings(db, &dirty_node_ids, model)?;
+    }
+    Ok(true)
+}
+
+pub fn run_incremental_index(db: &Database, project_root: &Path, model: Option<&EmbeddingModel>, progress: Option<ProgressFn>) -> Result<IndexResult> {
+    let start = std::time::Instant::now();
+    let stored_hashes = get_all_file_hashes(db.conn())?;
+    let current_hashes = scan_directory(project_root)?;
+    let diff = compute_diff(&stored_hashes, &current_hashes);
+
+    // Preserve <external> pseudo-file across incremental indexes
+    let deleted_files: Vec<String> = diff.deleted_files.into_iter()
+        .filter(|p| p != "<external>")
+        .collect();
+    let to_index: Vec<String> = [diff.new_files, diff.changed_files].concat();
+
+    let dirty_node_ids = if !to_index.is_empty() {
+        collect_dirty_node_ids(db, &to_index)?
+    } else {
+        HashSet::new()
+    };
+
+    let result = index_files(db, project_root, &to_index, &current_hashes, model, &deleted_files, progress)?;
+
+    if !dirty_node_ids.is_empty() {
+        regenerate_context_strings(db, &dirty_node_ids, model)?;
+    }
+
+    if result.files_indexed > 0 || !deleted_files.is_empty() {
+        tracing::info!(
+            "[incremental] {} files changed, {} deleted, {} nodes, {} edges, {:.1}s",
+            result.files_indexed, deleted_files.len(),
+            result.nodes_created, result.edges_created,
+            start.elapsed().as_secs_f64()
+        );
+    }
+
+    Ok(result)
+}
+
+/// Incremental index with directory mtime cache for faster scanning.
+/// Files in unchanged directories are skipped entirely.
+pub fn run_incremental_index_cached(
+    db: &Database,
+    project_root: &Path,
+    model: Option<&EmbeddingModel>,
+    dir_cache: Option<&DirectoryCache>,
+    progress: Option<ProgressFn>,
+) -> Result<(IndexResult, DirectoryCache)> {
+    let start = std::time::Instant::now();
+    let stored_hashes = get_all_file_hashes(db.conn())?;
+    let (mut current_hashes, new_cache) = scan_directory_cached(project_root, dir_cache)?;
+
+    // Merge stored hashes for files in unchanged directories.
+    // scan_directory_cached skips files in unchanged dirs, so we need to
+    // carry forward their stored hashes to prevent false "deleted" diffs.
+    // Use new_cache.file_mtimes (populated for ALL walked files) to check existence
+    // without per-file stat calls.
+    for (path, hash) in &stored_hashes {
+        if !current_hashes.contains_key(path) && new_cache.file_exists(path) {
+            current_hashes.insert(path.clone(), hash.clone());
+        }
+    }
+
+    let diff = compute_diff(&stored_hashes, &current_hashes);
+
+    // Preserve <external> pseudo-file across incremental indexes
+    let deleted_files: Vec<String> = diff.deleted_files.into_iter()
+        .filter(|p| p != "<external>")
+        .collect();
+    let to_index: Vec<String> = [diff.new_files, diff.changed_files].concat();
+
+    let dirty_node_ids = if !to_index.is_empty() {
+        collect_dirty_node_ids(db, &to_index)?
+    } else {
+        HashSet::new()
+    };
+
+    let result = index_files(db, project_root, &to_index, &current_hashes, model, &deleted_files, progress)?;
+
+    if !dirty_node_ids.is_empty() {
+        regenerate_context_strings(db, &dirty_node_ids, model)?;
+    }
+
+    if result.files_indexed > 0 || !deleted_files.is_empty() {
+        tracing::info!(
+            "[incremental] {} files changed, {} deleted, {} nodes, {} edges, {:.1}s",
+            result.files_indexed, deleted_files.len(),
+            result.nodes_created, result.edges_created,
+            start.elapsed().as_secs_f64()
+        );
+    }
+
+    Ok((result, new_cache))
+}
+
+/// Collect node IDs in OTHER files that have edges pointing to nodes in the changed files.
+/// Must be called BEFORE re-indexing (cascade delete removes old edges).
+fn collect_dirty_node_ids(db: &Database, changed_paths: &[String]) -> Result<HashSet<i64>> {
+    let mut changed_file_ids = Vec::new();
+    for path in changed_paths {
+        let file_id: Option<i64> = db.conn().query_row(
+            "SELECT id FROM files WHERE path = ?1",
+            [path],
+            |row| row.get(0),
+        ).ok();
+        if let Some(id) = file_id {
+            changed_file_ids.push(id);
+        }
+    }
+    let ids = get_dirty_node_ids(db.conn(), &changed_file_ids)?;
+    Ok(ids.into_iter().collect())
+}
diff --git a/src/indexer/pipeline/python_modules.rs b/src/indexer/pipeline/python_modules.rs
new file mode 100644
index 0000000..720a0d4
--- /dev/null
+++ b/src/indexer/pipeline/python_modules.rs
@@ -0,0 +1,73 @@
+//! Python module path resolution. `import myapp.utils` and `from myapp.utils
+//! import helper` carry dotted module paths that don't directly map to file
+//! names, so the indexer pre-builds a `dotted_path → file_paths` map and
+//! consults it during Phase 2 import-edge resolution.
+//!
+//! Suffix matching deliberately fans out: `utils` matches every `*/utils.py`
+//! we know about. Over-connecting is the safer failure mode for dependency
+//! analysis without `sys.path` context — a missed dependency is harder to
+//! debug than an extra one.
+
+use std::collections::{HashMap, HashSet};
+
+/// Build mapping from Python dotted module paths to file paths.
+/// Registers both full paths and suffix paths for flexible matching.
+/// e.g., "src/myapp/utils.py" matches "src.myapp.utils", "myapp.utils", and "utils".
+pub(super) fn build_python_module_map(python_paths: &HashSet<String>) -> HashMap<String, Vec<String>> {
+    let mut map: HashMap<String, Vec<String>> = HashMap::new();
+    for path in python_paths {
+        let stripped = if let Some(s) = path.strip_suffix("/__init__.py") {
+            s
+        } else if let Some(s) = path.strip_suffix(".py") {
+            s
+        } else {
+            continue;
+        };
+
+        // Register all suffix module paths for flexible matching
+        // e.g., "src/myapp/utils" -> "src.myapp.utils", "myapp.utils", "utils"
+        let parts: Vec<&str> = stripped.split('/').collect();
+        for i in 0..parts.len() {
+            let dotted = parts[i..].join(".");
+            map.entry(dotted).or_default().push(path.clone());
+        }
+    }
+    // Deduplicate
+    for paths in map.values_mut() {
+        paths.sort();
+        paths.dedup();
+    }
+    map
+}
+
+/// Resolve Python import targets using pre-parsed module metadata.
+/// For `import X` (is_module_import): finds `<module>` nodes in resolved files.
+/// For `from X import Y`: finds nodes named Y only in resolved files.
+/// Returns None if module can't be resolved or no matching nodes found.
+pub(super) fn resolve_python_module_targets(
+    python_module: &str,
+    is_module_import: bool,
+    target_name: &str,
+    python_module_map: &HashMap<String, Vec<String>>,
+    node_id_to_path: &HashMap<i64, String>,
+    name_to_ids: &HashMap<String, Vec<i64>>,
+) -> Option<Vec<i64>> {
+    // Resolve module path to file path(s).
+    // Note: suffix matching in python_module_map means `import utils` may match
+    // multiple files (e.g., "myapp/utils.py" and "other/utils.py"). This is an
+    // inherent ambiguity without sys.path context; over-connecting is safer for
+    // dependency analysis than missing real dependencies.
+    let module_files = python_module_map.get(python_module)?;
+
+    let lookup_name = if is_module_import { "<module>" } else { target_name };
+    let all_ids = name_to_ids.get(lookup_name)?;
+    let targets: Vec<i64> = all_ids.iter()
+        .filter(|nid| {
+            node_id_to_path.get(nid)
+                .map(|p| module_files.contains(p))
+                .unwrap_or(false)
+        })
+        .copied()
+        .collect();
+    if targets.is_empty() { None } else { Some(targets) }
+}
diff --git a/src/indexer/pipeline/resolve.rs b/src/indexer/pipeline/resolve.rs
new file mode 100644
index 0000000..f09cd3d
--- /dev/null
+++ b/src/indexer/pipeline/resolve.rs
@@ -0,0 +1,204 @@
+//! Cross-file call resolution helpers shared by the main `index_files` walk
+//! and the post-index `pending_unresolved_calls` sweep.
+//!
+//! - `refine_ambiguous_targets`: disambiguator — when a call's target name
+//!   matches N same-language nodes across files, prefer non-test paths and
+//!   the longest common path prefix with the caller.
+//! - `resolve_pending_calls`: drains buffered same-language-but-callee-not-yet-
+//!   indexed rows once the callee appears (post-incremental sweep).
+
+use anyhow::Result;
+use std::collections::HashMap;
+
+use crate::storage::db::Database;
+use crate::storage::queries::{
+    delete_pending_unresolved_call, insert_edge_cached, list_pending_unresolved_calls,
+};
+use crate::domain::REL_CALLS;
+
+/// Disambiguate N same-language cross-file candidates for a single call/import
+/// target. Returns a subset. A single-element result is the authoritative
+/// winner; ties fall back to the full input so the caller does not
+/// inadvertently drop legitimate edges.
+///
+/// Heuristic: (1) prefer non-test-file candidates when the caller is not
+/// itself a test file; (2) among the preferred pool, keep only those tied
+/// for the longest byte-common path prefix with the caller. Previous
+/// versions dropped on ambiguity, which regressed dead-code detection for
+/// bare-name Rust calls like `crate::domain::foo()` where scoped_identifier
+/// extraction keeps only `foo` and two `foo` definitions under `src/` tie
+/// on prefix — better to keep both edges than to report `foo` as dead.
+pub(super) fn refine_ambiguous_targets(
+    candidates: &[i64],
+    caller_rel_path: &str,
+    node_id_to_path: &HashMap<i64, String>,
+) -> Vec<i64> {
+    if candidates.len() <= 1 {
+        return candidates.to_vec();
+    }
+
+    let is_test_path = |p: &str| {
+        p.contains(".test.") || p.contains("_test.")
+            || p.starts_with("tests/") || p.contains("/tests/")
+            || p.starts_with("test/") || p.contains("/test/")
+            || p.contains(".spec.")
+    };
+    let caller_is_test = is_test_path(caller_rel_path);
+
+    // Pass 1: prefer non-test candidates when the caller is non-test code.
+    let pool: Vec<i64> = if caller_is_test {
+        candidates.to_vec()
+    } else {
+        let non_test: Vec<i64> = candidates.iter().copied()
+            .filter(|id| {
+                let p = node_id_to_path.get(id).map(String::as_str).unwrap_or("");
+                !is_test_path(p)
+            })
+            .collect();
+        if non_test.is_empty() { candidates.to_vec() } else { non_test }
+    };
+
+    if pool.len() == 1 { return pool; }
+
+    // Pass 2: keep only candidates tied for the longest common path prefix
+    // with the caller. Byte-wise prefix is a rough proxy for module locality
+    // — e.g. `claude-plugin/scripts/session-init.js` shares 21 bytes with
+    // `claude-plugin/scripts/lifecycle.js` but 0 bytes with `scripts/*`.
+    let prefix_len = |p: &str| -> usize {
+        caller_rel_path.bytes().zip(p.bytes())
+            .take_while(|(a, b)| a == b)
+            .count()
+    };
+    let max_prefix = pool.iter()
+        .map(|id| prefix_len(node_id_to_path.get(id).map(String::as_str).unwrap_or("")))
+        .max()
+        .unwrap_or(0);
+    let closest: Vec<i64> = pool.iter().copied()
+        .filter(|id| prefix_len(node_id_to_path.get(id).map(String::as_str).unwrap_or("")) == max_prefix)
+        .collect();
+
+    if closest.len() == 1 { return closest; }
+
+    // Still ambiguous — return the remaining pool rather than dropping. This
+    // keeps dead-code precision high for edges we cannot confidently prune
+    // (most notably Rust bare-name scoped calls) at the cost of leaving a
+    // small amount of fan-out; the single-winner fast path above handles
+    // the common case (unique non-test match, or unique closest path).
+    if !closest.is_empty() { closest } else { pool }
+}
+
+/// Sweep `pending_unresolved_calls` against the current node state. Rows whose
+/// `(target_name, source_language)` now match a real node become a `calls`
+/// edge and the pending row is dropped; rows that still don't resolve stay
+/// buffered for the next index pass.
+///
+/// Resolution priority mirrors Phase 2: same-language candidates only (no
+/// cross-language promotion — memory `feedback_edge_resolution_same_language.md`
+/// flags that as the canonical false-positive class), with
+/// `refine_ambiguous_targets` applied when multiple candidates share the name.
+///
+/// Returns the number of edges inserted by this sweep.
+pub(super) fn resolve_pending_calls(db: &Database) -> Result<usize> {
+    let pending = list_pending_unresolved_calls(db.conn())?;
+    if pending.is_empty() {
+        return Ok(0);
+    }
+
+    // Build name → [(node_id, language)] map ONCE, then iterate pending rows
+    // in memory. Narrowed by `n.name IN (SELECT DISTINCT target_name ...)` so
+    // even a 1-row pending table doesn't trigger a full nodes-table scan on
+    // every incremental pass — for a 100K-node project the unfiltered SELECT
+    // was 100K rows × every index call, even with no work to do.
+    let mut name_to_lang_targets: HashMap<String, Vec<(i64, String)>> = HashMap::new();
+    let mut node_id_to_path: HashMap<i64, String> = HashMap::new();
+    {
+        let mut stmt = db.conn().prepare(
+            "SELECT n.id, n.name, COALESCE(f.language, ''), f.path
+             FROM nodes n JOIN files f ON f.id = n.file_id
+             WHERE f.language IS NOT NULL
+               AND n.name IN (SELECT DISTINCT target_name FROM pending_unresolved_calls)"
+        )?;
+        let rows = stmt.query_map([], |row| {
+            Ok((
+                row.get::<_, i64>(0)?,
+                row.get::<_, String>(1)?,
+                row.get::<_, String>(2)?,
+                row.get::<_, String>(3)?,
+            ))
+        })?;
+        for row in rows {
+            let (id, name, lang, path) = row?;
+            if lang.is_empty() {
+                continue;
+            }
+            name_to_lang_targets.entry(name).or_default().push((id, lang));
+            node_id_to_path.insert(id, path);
+        }
+    }
+
+    // Map source_id → source file path so refine_ambiguous_targets gets the
+    // proximity hint it needs.
+    let source_ids: Vec<i64> = pending.iter().map(|p| p.source_id).collect();
+    let mut source_id_to_path: HashMap<i64, String> = HashMap::new();
+    if !source_ids.is_empty() {
+        let placeholders = std::iter::repeat_n("?", source_ids.len()).collect::<Vec<_>>().join(",");
+        let sql = format!(
+            "SELECT n.id, f.path FROM nodes n JOIN files f ON f.id = n.file_id WHERE n.id IN ({})",
+            placeholders
+        );
+        let mut stmt = db.conn().prepare(&sql)?;
+        let params: Vec<&dyn rusqlite::ToSql> = source_ids.iter()
+            .map(|id| id as &dyn rusqlite::ToSql)
+            .collect();
+        let rows = stmt.query_map(params.as_slice(), |row| {
+            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
+        })?;
+        for row in rows {
+            let (id, path) = row?;
+            source_id_to_path.insert(id, path);
+        }
+    }
+
+    let mut edges_added = 0usize;
+    let mut to_delete: Vec<i64> = Vec::new();
+
+    for row in &pending {
+        let candidates: Vec<i64> = name_to_lang_targets.get(&row.target_name)
+            .map(|entries| entries.iter()
+                .filter(|(_, lang)| *lang == row.source_language)
+                .map(|(id, _)| *id)
+                .filter(|id| *id != row.source_id) // self-call guard
+                .collect())
+            .unwrap_or_default();
+
+        if candidates.is_empty() {
+            continue; // still unresolvable — leave buffered
+        }
+
+        let refined = if candidates.len() > 1 {
+            let source_path = source_id_to_path.get(&row.source_id).cloned().unwrap_or_default();
+            refine_ambiguous_targets(&candidates, &source_path, &node_id_to_path)
+        } else {
+            candidates
+        };
+
+        for tgt_id in &refined {
+            if insert_edge_cached(
+                db.conn(),
+                row.source_id,
+                *tgt_id,
+                REL_CALLS,
+                row.metadata.as_deref(),
+            )? {
+                edges_added += 1;
+            }
+        }
+        to_delete.push(row.id);
+    }
+
+    for id in to_delete {
+        delete_pending_unresolved_call(db.conn(), id)?;
+    }
+
+    Ok(edges_added)
+}
diff --git a/src/indexer/pipeline/tests.rs b/src/indexer/pipeline/tests.rs
new file mode 100644
index 0000000..0bf3668
--- /dev/null
+++ b/src/indexer/pipeline/tests.rs
@@ -0,0 +1,884 @@
+use super::*;
+use super::python_modules::build_python_module_map;
+use crate::storage::queries::{
+    get_nodes_by_file_path, get_nodes_by_name, get_edges_from, get_import_tree,
+};
+use crate::domain::REL_CALLS;
+use tempfile::TempDir;
+use std::fs;
+
+#[test]
+fn test_full_index_pipeline() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+
+    fs::create_dir_all(project_dir.path().join("src")).unwrap();
+    fs::write(project_dir.path().join("src/auth.ts"), r#"
+function validateToken(token: string): boolean {
+    return jwt.verify(token);
+}
+
+function handleLogin(req: Request) {
+    if (validateToken(req.token)) {
+        return createSession(req.userId);
+    }
+}
+"#).unwrap();
+
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    assert!(result.files_indexed > 0);
+    assert!(result.nodes_created > 0);
+    assert!(result.edges_created > 0);
+
+    // Verify nodes are in DB
+    let nodes = get_nodes_by_name(db.conn(), "handleLogin").unwrap();
+    assert_eq!(nodes.len(), 1);
+
+    // Verify edges: handleLogin → calls → validateToken
+    let edges = get_edges_from(db.conn(), nodes[0].id).unwrap();
+    assert!(edges.iter().any(|e| e.relation == REL_CALLS), "should have call edges");
+
+    // Verify context string was built
+    assert!(nodes[0].context_string.is_some(), "context string should be set after Phase 3");
+}
+
+#[test]
+fn test_cross_language_bare_name_call_resolution() {
+    // Regression: Rust method call `hasher.update(...)` was resolving to
+    // JS `function update()` via global bare-name lookup, producing phantom
+    // Rust → JS call edges in mixed projects. Fix: same-file > same-language
+    // tiers; drop call edges with no same-language candidate.
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    fs::create_dir_all(project_dir.path().join("src")).unwrap();
+    fs::create_dir_all(project_dir.path().join("scripts")).unwrap();
+
+    fs::write(project_dir.path().join("src/hasher.rs"), r#"
+pub fn caller_rs() {
+    let mut h = Hasher::new();
+    h.update(&[1, 2, 3]);
+    h.finalize();
+}
+"#).unwrap();
+
+    fs::write(project_dir.path().join("scripts/helper.js"), r#"
+function update() { return 1; }
+function caller_js() { update(); }
+"#).unwrap();
+
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    let rust_caller = crate::storage::queries::get_nodes_with_files_by_name(
+        db.conn(), "caller_rs",
+    ).unwrap();
+    let rust_caller = rust_caller.iter()
+        .find(|n| n.file_path == "src/hasher.rs")
+        .expect("Rust caller_rs should be indexed");
+    let edges = get_edges_from(db.conn(), rust_caller.node.id).unwrap();
+    for e in &edges {
+        if e.relation != REL_CALLS { continue; }
+        let tgt_path: Option<String> = db.conn().query_row(
+            "SELECT f.path FROM nodes n JOIN files f ON n.file_id = f.id WHERE n.id = ?1",
+            [e.target_id], |row| row.get(0),
+        ).ok();
+        assert!(
+            !tgt_path.as_deref().unwrap_or("").ends_with(".js"),
+            "Rust caller must not resolve calls into JS; got edge → {:?}", tgt_path,
+        );
+    }
+
+    let js_caller = crate::storage::queries::get_nodes_with_files_by_name(
+        db.conn(), "caller_js",
+    ).unwrap();
+    let js_caller = js_caller.iter()
+        .find(|n| n.file_path == "scripts/helper.js")
+        .expect("JS caller_js should be indexed");
+    let js_edges = get_edges_from(db.conn(), js_caller.node.id).unwrap();
+    let js_call_targets: Vec<i64> = js_edges.iter()
+        .filter(|e| e.relation == REL_CALLS)
+        .map(|e| e.target_id)
+        .collect();
+    assert!(!js_call_targets.is_empty(),
+        "JS caller_js → update edge within same file should still resolve");
+}
+
+#[test]
+fn test_js_require_creates_external_import_edges() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    fs::write(project_dir.path().join("app.js"), r#"
+const fs = require('fs');
+const path = require('path');
+const lifecycle = require('./lifecycle');
+
+function main() { fs.readFileSync('x'); }
+"#).unwrap();
+
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    let imports: Vec<String> = db.conn().prepare(
+        "SELECT DISTINCT n2.name FROM edges e
+         JOIN nodes n ON n.id = e.source_id
+         JOIN files f ON f.id = n.file_id
+         JOIN nodes n2 ON n2.id = e.target_id
+         WHERE f.path = 'app.js' AND e.relation = 'imports'"
+    ).unwrap()
+     .query_map([], |row| row.get::<_, String>(0)).unwrap()
+     .filter_map(Result::ok)
+     .collect();
+
+    assert!(imports.contains(&"fs".to_string()),        "imports: {:?}", imports);
+    assert!(imports.contains(&"path".to_string()),      "imports: {:?}", imports);
+    assert!(imports.contains(&"lifecycle".to_string()), "imports: {:?}", imports);
+}
+
+#[test]
+fn test_js_same_name_cross_file_prefers_closest_path() {
+    // Regression: when JS defines the same helper name in multiple files
+    // (e.g., `readJson` in both `claude-plugin/scripts/lifecycle.js` and
+    // `scripts/install-e2e.test.js`), a caller in `claude-plugin/scripts/*`
+    // used to fan out an edge to every same-language match, producing
+    // false-positive callers across unrelated modules. The resolver must
+    // pick the candidate with the longest common path prefix to the
+    // caller file (and prefer non-test files) rather than all.
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    fs::create_dir_all(project_dir.path().join("pkg/scripts")).unwrap();
+    fs::create_dir_all(project_dir.path().join("tests")).unwrap();
+
+    fs::write(project_dir.path().join("pkg/scripts/lifecycle.js"), r#"
+function readJson(p) { return 1; }
+module.exports = { readJson };
+"#).unwrap();
+
+    fs::write(project_dir.path().join("pkg/scripts/session-init.js"), r#"
+function syncLifecycleConfig() { readJson('x'); }
+"#).unwrap();
+
+    fs::write(project_dir.path().join("tests/helpers.test.js"), r#"
+function readJson(p) { return 2; }
+"#).unwrap();
+
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    // Find the caller node
+    let caller = crate::storage::queries::get_nodes_with_files_by_name(
+        db.conn(), "syncLifecycleConfig",
+    ).unwrap();
+    let caller = caller.iter()
+        .find(|n| n.file_path == "pkg/scripts/session-init.js")
+        .expect("syncLifecycleConfig should be indexed");
+
+    let edges = get_edges_from(db.conn(), caller.node.id).unwrap();
+    let call_edges: Vec<i64> = edges.iter()
+        .filter(|e| e.relation == REL_CALLS)
+        .map(|e| e.target_id)
+        .collect();
+
+    // Resolve target paths
+    let target_paths: Vec<String> = call_edges.iter().filter_map(|tid| {
+        db.conn().query_row(
+            "SELECT f.path FROM nodes n JOIN files f ON n.file_id = f.id WHERE n.id = ?1",
+            [*tid], |row| row.get(0)
+        ).ok()
+    }).collect();
+
+    // Must pick exactly the same-dir candidate, not fan out to the test file.
+    assert!(
+        target_paths.iter().any(|p| p == "pkg/scripts/lifecycle.js"),
+        "should resolve to same-dir readJson; got {:?}", target_paths
+    );
+    assert!(
+        !target_paths.iter().any(|p| p == "tests/helpers.test.js"),
+        "should NOT fan out to unrelated test-file readJson; got {:?}", target_paths
+    );
+}
+
+#[test]
+fn test_js_module_level_test_callback_calls_resolve() {
+    // Regression: helpers defined in a JS test file that are called only
+    // from inside `test(() => {...})` / `describe(() => {...})` callbacks
+    // used to be reported as orphan by dead-code, because the anonymous
+    // arrow callback body attributed its calls to `<anonymous>`, a name
+    // that resolves to no node. Module-level call_expressions inside JS
+    // test files must attribute to `<module>` so a same-file edge lands.
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+
+    fs::write(project_dir.path().join("helpers.test.js"), r#"
+function mkHome() { return '/tmp/x'; }
+function writeJson(p, v) { }
+
+test('uses helpers', () => {
+    const h = mkHome();
+    writeJson(h, { a: 1 });
+});
+"#).unwrap();
+
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    // Both helper names must have at least one incoming call edge.
+    for helper in ["mkHome", "writeJson"] {
+        let cnt: i64 = db.conn().query_row(
+            "SELECT COUNT(*) FROM edges e
+             JOIN nodes tn ON tn.id = e.target_id
+             JOIN files tf ON tf.id = tn.file_id
+             WHERE tn.name = ?1 AND tf.path = 'helpers.test.js' AND e.relation = 'calls'",
+            [helper], |row| row.get(0),
+        ).unwrap();
+        assert!(cnt >= 1,
+            "{} should have at least one incoming call edge from the test callback, got {}",
+            helper, cnt);
+    }
+}
+
+#[test]
+fn test_incremental_index() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Initial index
+    fs::write(project_dir.path().join("a.ts"), "function foo() {}").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    // Modify file
+    fs::write(project_dir.path().join("a.ts"), "function bar() {}").unwrap();
+
+    // Incremental index
+    let result = run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+    assert_eq!(result.files_indexed, 1);
+
+    let foo = get_nodes_by_name(db.conn(), "foo").unwrap();
+    assert_eq!(foo.len(), 0);
+    let bar = get_nodes_by_name(db.conn(), "bar").unwrap();
+    assert_eq!(bar.len(), 1);
+}
+
+#[test]
+fn test_incremental_propagates_dirty_context() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Initial: B (in b.ts) calls A (in a.ts)
+    fs::write(project_dir.path().join("a.ts"), "function alpha() {}").unwrap();
+    fs::write(project_dir.path().join("b.ts"), "function beta() { alpha(); }").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    let beta_nodes = get_nodes_by_name(db.conn(), "beta").unwrap();
+    assert_eq!(beta_nodes.len(), 1);
+    let beta_ctx_before = beta_nodes[0].context_string.clone().unwrap_or_default();
+
+    // Change A: rename function (alpha -> alphaRenamed)
+    fs::write(project_dir.path().join("a.ts"), "function alphaRenamed() {}").unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    // beta's context_string should be updated (calls list changed because
+    // the old alpha node is gone and edge was cascade-deleted)
+    let beta_nodes_after = get_nodes_by_name(db.conn(), "beta").unwrap();
+    assert_eq!(beta_nodes_after.len(), 1);
+    let beta_ctx_after = beta_nodes_after[0].context_string.clone().unwrap_or_default();
+    assert_ne!(beta_ctx_before, beta_ctx_after);
+}
+
+#[test]
+fn test_deleted_file_cleanup() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    fs::write(project_dir.path().join("a.ts"), "function foo() {}").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    fs::remove_file(project_dir.path().join("a.ts")).unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    let foo = get_nodes_by_name(db.conn(), "foo").unwrap();
+    assert_eq!(foo.len(), 0);
+}
+
+#[test]
+fn test_build_python_module_map() {
+    let mut paths = HashSet::new();
+    paths.insert("myapp/utils.py".into());
+    paths.insert("myapp/__init__.py".into());
+    paths.insert("src/myapp/models.py".into());
+
+    let map = build_python_module_map(&paths);
+
+    // Full dotted path
+    assert!(map.get("myapp.utils").unwrap().contains(&"myapp/utils.py".to_string()));
+    // Suffix path
+    assert!(map.get("utils").unwrap().contains(&"myapp/utils.py".to_string()));
+    // __init__.py maps to package
+    assert!(map.get("myapp").unwrap().contains(&"myapp/__init__.py".to_string()));
+    // Nested with src/ prefix
+    assert!(map.get("myapp.models").unwrap().contains(&"src/myapp/models.py".to_string()));
+}
+
+#[test]
+fn test_python_from_import_resolution() {
+    // Test `from myapp.utils import helper` creates correct cross-file edge
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    fs::create_dir_all(project_dir.path().join("myapp")).unwrap();
+    fs::write(
+        project_dir.path().join("myapp/utils.py"),
+        "def helper():\n    return 42\n",
+    ).unwrap();
+    fs::write(
+        project_dir.path().join("myapp/main.py"),
+        "from myapp.utils import helper\n\ndef main():\n    helper()\n",
+    ).unwrap();
+
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert!(result.edges_created > 0, "should create import edges");
+
+    // Verify dependency: main.py -> utils.py
+    let deps = get_import_tree(db.conn(), "myapp/main.py", "outgoing", 1).unwrap();
+    assert!(
+        deps.iter().any(|d| d.file_path == "myapp/utils.py"),
+        "main.py should depend on utils.py, got: {:?}",
+        deps.iter().map(|d| &d.file_path).collect::<Vec<_>>()
+    );
+}
+
+#[test]
+fn test_python_import_module_resolution() {
+    // Test `import myutils` creates correct cross-file edge
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    fs::write(
+        project_dir.path().join("myutils.py"),
+        "def do_something():\n    pass\n",
+    ).unwrap();
+    fs::write(
+        project_dir.path().join("main.py"),
+        "import myutils\n\ndef main():\n    myutils.do_something()\n",
+    ).unwrap();
+
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert!(result.edges_created > 0, "should create import edges");
+
+    // Verify dependency: main.py -> myutils.py
+    let deps = get_import_tree(db.conn(), "main.py", "outgoing", 1).unwrap();
+    assert!(
+        deps.iter().any(|d| d.file_path == "myutils.py"),
+        "main.py should depend on myutils.py, got: {:?}",
+        deps.iter().map(|d| &d.file_path).collect::<Vec<_>>()
+    );
+}
+
+#[test]
+fn test_python_external_import_creates_virtual_nodes() {
+    // Test that external imports create virtual nodes in <external> file
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    fs::write(
+        project_dir.path().join("app.py"),
+        "import os\nfrom collections import OrderedDict\nfrom flask import Flask\n\ndef main():\n    pass\n",
+    ).unwrap();
+
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert!(result.files_indexed > 0, "should index the file");
+
+    // Verify <external> file was created with virtual nodes
+    let ext_nodes = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
+    let ext_names: Vec<&str> = ext_nodes.iter().map(|n| n.name.as_str()).collect();
+    assert!(ext_names.contains(&"os"), "should have virtual node for 'os', got: {:?}", ext_names);
+    assert!(ext_names.contains(&"collections"), "should have virtual node for 'collections', got: {:?}", ext_names);
+    assert!(ext_names.contains(&"flask"), "should have virtual node for 'flask', got: {:?}", ext_names);
+
+    // Verify dependency_graph shows <external> as a dependency
+    let deps = get_import_tree(db.conn(), "app.py", "outgoing", 1).unwrap();
+    assert!(
+        deps.iter().any(|d| d.file_path == "<external>"),
+        "app.py should show <external> dependency, got: {:?}",
+        deps.iter().map(|d| &d.file_path).collect::<Vec<_>>()
+    );
+}
+
+#[test]
+fn test_python_mixed_internal_external_imports() {
+    // Test project with both internal and external imports
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    fs::create_dir_all(project_dir.path().join("myapp")).unwrap();
+    fs::write(
+        project_dir.path().join("myapp/utils.py"),
+        "def helper():\n    return 42\n",
+    ).unwrap();
+    fs::write(
+        project_dir.path().join("myapp/main.py"),
+        "import os\nfrom myapp.utils import helper\nfrom flask import Flask\n\ndef main():\n    helper()\n",
+    ).unwrap();
+
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert!(result.edges_created > 0);
+
+    // Should have internal dependency
+    let deps = get_import_tree(db.conn(), "myapp/main.py", "outgoing", 1).unwrap();
+    let dep_files: Vec<&str> = deps.iter().map(|d| d.file_path.as_str()).collect();
+    assert!(dep_files.contains(&"myapp/utils.py"), "should depend on internal utils.py, got: {:?}", dep_files);
+
+    // Should also have external dependency
+    assert!(dep_files.contains(&"<external>"), "should depend on <external>, got: {:?}", dep_files);
+}
+
+#[test]
+fn test_index_stats_skipped_large_file() {
+    // Verify that IndexResult.stats tracks files skipped due to size
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Create a normal file
+    fs::write(project_dir.path().join("small.ts"), "function ok() {}").unwrap();
+
+    // Create a file exceeding MAX_FILE_SIZE (10MB)
+    let big_content = "a".repeat(11 * 1024 * 1024);
+    fs::write(project_dir.path().join("huge.ts"), &big_content).unwrap();
+
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert_eq!(result.files_indexed, 1, "should index the small file");
+    assert_eq!(result.stats.files_skipped_size, 1, "should track the large file skip");
+}
+
+#[test]
+fn test_index_stats_skipped_parse_error() {
+    // Verify that IndexResult.stats tracks files skipped due to parse errors
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Create a valid file
+    fs::write(project_dir.path().join("good.ts"), "function ok() {}").unwrap();
+
+    // Create a file with an unsupported extension that detect_language returns None for
+    // (this is filtered by detect_language returning None, not a parse error)
+    // Instead, we just verify the default stats are zero for parse errors
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert_eq!(result.stats.files_skipped_parse, 0);
+    assert_eq!(result.stats.files_skipped_read, 0);
+    assert_eq!(result.stats.files_skipped_hash, 0);
+}
+
+#[test]
+fn test_index_stats_default() {
+    // IndexStats should implement Default
+    let stats = IndexStats::default();
+    assert_eq!(stats.files_skipped_size, 0);
+    assert_eq!(stats.files_skipped_parse, 0);
+    assert_eq!(stats.files_skipped_read, 0);
+    assert_eq!(stats.files_skipped_hash, 0);
+    assert_eq!(stats.files_skipped_language, 0);
+}
+
+#[test]
+fn test_python_external_survives_incremental_index() {
+    // Test that <external> pseudo-file persists across incremental re-indexes
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    fs::write(
+        project_dir.path().join("app.py"),
+        "import os\n\ndef main():\n    pass\n",
+    ).unwrap();
+
+    // Full index → creates <external> with "os" node
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+    let ext_before = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
+    assert!(!ext_before.is_empty(), "should have external nodes after full index");
+
+    // Modify file slightly
+    fs::write(
+        project_dir.path().join("app.py"),
+        "import os\n\ndef main():\n    return 1\n",
+    ).unwrap();
+
+    // Incremental index → <external> should survive
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+    let ext_after = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
+    assert!(!ext_after.is_empty(), "external nodes should survive incremental index");
+
+    // Verify dependency still visible
+    let deps = get_import_tree(db.conn(), "app.py", "outgoing", 1).unwrap();
+    assert!(
+        deps.iter().any(|d| d.file_path == "<external>"),
+        "app.py should still show <external> dependency after incremental index"
+    );
+}
+
+#[test]
+fn test_repair_null_context_strings() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Index a file so nodes get context strings
+    fs::write(project_dir.path().join("a.ts"), r#"
+function alpha() { return 1; }
+function beta() { alpha(); }
+"#).unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    // Verify context strings exist after index
+    let alpha_nodes = get_nodes_by_name(db.conn(), "alpha").unwrap();
+    assert_eq!(alpha_nodes.len(), 1);
+    assert!(alpha_nodes[0].context_string.is_some(), "alpha should have context_string after index");
+
+    let beta_nodes = get_nodes_by_name(db.conn(), "beta").unwrap();
+    assert_eq!(beta_nodes.len(), 1);
+    assert!(beta_nodes[0].context_string.is_some(), "beta should have context_string after index");
+
+    // Simulate Phase 3 failure: NULL out context_strings
+    db.conn().execute("UPDATE nodes SET context_string = NULL", []).unwrap();
+
+    // Verify they are now NULL
+    let alpha_after_null = get_nodes_by_name(db.conn(), "alpha").unwrap();
+    assert!(alpha_after_null[0].context_string.is_none(), "alpha context_string should be NULL after simulated failure");
+
+    // Run repair
+    let repaired = repair_null_context_strings(&db, None).unwrap();
+    assert!(repaired > 0, "should repair at least 1 node");
+
+    // Verify context strings were restored
+    let alpha_repaired = get_nodes_by_name(db.conn(), "alpha").unwrap();
+    assert!(alpha_repaired[0].context_string.is_some(), "alpha should have context_string after repair");
+
+    let beta_repaired = get_nodes_by_name(db.conn(), "beta").unwrap();
+    assert!(beta_repaired[0].context_string.is_some(), "beta should have context_string after repair");
+}
+
+#[test]
+fn test_rust_implements_creates_sentinel_for_external_trait() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    fs::write(project_dir.path().join("main.rs"), r#"
+use std::io::{self, Write};
+use std::fmt;
+
+struct MyWriter;
+
+impl Write for MyWriter {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> { Ok(buf.len()) }
+    fn flush(&mut self) -> io::Result<()> { Ok(()) }
+}
+
+impl fmt::Display for MyWriter {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "MyWriter")
+    }
+}
+"#).unwrap();
+
+    let result = run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert!(result.files_indexed > 0);
+
+    // Verify sentinel nodes created for external traits
+    let ext_nodes = get_nodes_by_file_path(db.conn(), "<external>").unwrap();
+    let ext_names: Vec<&str> = ext_nodes.iter().map(|n| n.name.as_str()).collect();
+    assert!(ext_names.contains(&"Write"), "should have sentinel for Write, got: {:?}", ext_names);
+    // fmt::Display keeps path prefix (as parsed by tree-sitter)
+    assert!(ext_names.contains(&"fmt::Display"), "should have sentinel for fmt::Display, got: {:?}", ext_names);
+
+    // Verify sentinel type is "trait"
+    let write_node = ext_nodes.iter().find(|n| n.name == "Write").unwrap();
+    assert_eq!(write_node.node_type, "trait", "sentinel should be type 'trait'");
+
+    // Verify implements edges exist: MyWriter → Write, MyWriter → Display
+    let edges: Vec<(String, String)> = db.conn().prepare(
+        "SELECT ns.name, nt.name FROM edges e
+         JOIN nodes ns ON ns.id = e.source_id
+         JOIN nodes nt ON nt.id = e.target_id
+         WHERE e.relation = 'implements'"
+    ).unwrap()
+    .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
+    .unwrap()
+    .collect::<Result<Vec<_>, _>>().unwrap();
+
+    assert!(edges.contains(&("MyWriter".into(), "Write".into())),
+        "should have MyWriter→Write implements edge, got: {:?}", edges);
+    assert!(edges.contains(&("MyWriter".into(), "fmt::Display".into())),
+        "should have MyWriter→fmt::Display implements edge, got: {:?}", edges);
+}
+
+/// ensure_file_indexed must (a) be a no-op when on-disk hash matches the
+/// stored hash, and (b) actually pick up post-edit content when it doesn't.
+/// This is the contract the MCP `ensure_file_fresh_opt` wrapper relies on
+/// to close the post-Edit→pre-incremental-index window.
+#[test]
+fn test_ensure_file_indexed_picks_up_post_edit_changes() {
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Initial state: file with `alpha`
+    fs::write(project_dir.path().join("a.ts"), "function alpha() {}\n").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+    let names_before: Vec<String> = get_nodes_by_name(db.conn(), "alpha")
+        .unwrap().into_iter().map(|n| n.name).collect();
+    assert_eq!(names_before, vec!["alpha".to_string()]);
+
+    // No-op when hashes match
+    let did = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
+    assert!(!did, "matching hash must be a no-op (got reindex)");
+
+    // Edit on disk; old `alpha` removed, new `beta` added
+    fs::write(project_dir.path().join("a.ts"), "function beta() {}\n").unwrap();
+    let did2 = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
+    assert!(did2, "hash mismatch must trigger a reindex");
+
+    // alpha gone, beta present — post-Edit query would now see fresh state
+    assert!(get_nodes_by_name(db.conn(), "alpha").unwrap().is_empty(),
+        "old alpha must be evicted by single-file reindex");
+    let beta = get_nodes_by_name(db.conn(), "beta").unwrap();
+    assert_eq!(beta.len(), 1, "new beta must appear after single-file reindex");
+    assert_eq!(beta[0].name, "beta");
+
+    // Calling again with no on-disk change is a no-op
+    let did3 = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
+    assert!(!did3, "second call with no edit must no-op");
+
+    // Deleting the file from disk drops the row
+    fs::remove_file(project_dir.path().join("a.ts")).unwrap();
+    let did4 = ensure_file_indexed(&db, project_dir.path(), "a.ts", None).unwrap();
+    assert!(did4, "missing file must trigger row cleanup");
+    assert!(get_nodes_by_name(db.conn(), "beta").unwrap().is_empty(),
+        "beta must be cascade-deleted with its file");
+}
+
+/// Root-cause test for `feedback_incremental_edge_timing.md`: file B
+/// (existing, unchanged) bare-name calls `foo()`. file A is added later
+/// with `function foo() {}`. Phase 2 of B's first index pass dropped the
+/// edge because `foo` was unresolvable; before this fix, A's later index
+/// never re-resolved B's call → permanently missing edge in incremental
+/// mode (only `rebuild-index` recovered it).
+///
+/// New behavior: B's drop becomes a `pending_unresolved_calls` row; A's
+/// index pass sweeps pending and promotes the row into a real edge.
+#[test]
+fn test_pending_unresolved_call_resolves_when_callee_added_later() {
+    use crate::storage::queries::{count_pending_unresolved_calls, get_node_ids_by_name};
+
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Step 1: B exists alone with bare-name call to foo (foo undefined).
+    fs::write(project_dir.path().join("b.ts"),
+        "function caller_b() { foo(); }\n").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    // Phase 2 dropped the edge (no same-file/same-language target) and
+    // buffered the row instead.
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1,
+        "B's call to undefined foo must land in pending_unresolved_calls");
+
+    let caller_b_id = get_node_ids_by_name(db.conn(), "caller_b").unwrap()
+        .into_iter().next().expect("caller_b must exist").0;
+
+    // Verify NO edge yet (foo doesn't exist in DB).
+    let pre_edges = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
+    assert!(pre_edges.iter().all(|e| e.relation != REL_CALLS),
+        "no calls edge should exist yet — foo is undefined");
+
+    // Step 2: A is added with foo(). Incremental index picks it up; the
+    // pending sweep at end of index_files promotes B's buffered call into
+    // a real edge.
+    fs::write(project_dir.path().join("a.ts"),
+        "export function foo() {}\n").unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    let foo_id = get_node_ids_by_name(db.conn(), "foo").unwrap()
+        .into_iter().next().expect("foo must exist after A indexed").0;
+
+    let post_edges = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
+    let calls_to_foo: Vec<_> = post_edges.iter()
+        .filter(|e| e.relation == REL_CALLS && e.target_id == foo_id)
+        .collect();
+    assert_eq!(calls_to_foo.len(), 1,
+        "incremental index must promote pending call → calls edge caller_b → foo; \
+         got edges: {:?}", post_edges.iter().map(|e| (&e.relation, e.target_id)).collect::<Vec<_>>());
+
+    // Pending row must be drained after successful resolution.
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
+        "resolved pending row must be deleted after edge insertion");
+}
+
+/// Cross-language pending must NOT resolve cross-language. If B (TS)
+/// calls `update()` and a later-indexed Rust file defines `fn update()`,
+/// the pending row must stay buffered, not silently bind cross-language
+/// (memory `feedback_edge_resolution_same_language.md`'s canonical
+/// false-positive class).
+#[test]
+fn test_pending_unresolved_call_does_not_cross_language() {
+    use crate::storage::queries::count_pending_unresolved_calls;
+
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // TS file with bare-name call to `update`
+    fs::write(project_dir.path().join("client.ts"),
+        "function caller_ts() { update(); }\n").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1);
+
+    // Rust file with `update` — different language, must NOT match.
+    fs::write(project_dir.path().join("hasher.rs"),
+        "fn update() {}\n").unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    // Pending row stays — sweep refused cross-language resolution.
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1,
+        "cross-language target must NOT resolve a TS pending call to a Rust fn");
+}
+
+/// One caller with N undefined references must produce N pending rows;
+/// when a single later-added file defines all N, all rows must resolve in
+/// a single sweep. Real codebases hit this whenever a "barrel" or shared
+/// utility module gets added after its consumers.
+#[test]
+fn test_pending_resolves_multiple_calls_in_same_caller() {
+    use crate::storage::queries::{count_pending_unresolved_calls, get_node_ids_by_name};
+
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // B has three undefined call targets — foo, bar, baz.
+    fs::write(project_dir.path().join("b.ts"),
+        "function caller_b() { foo(); bar(); baz(); }\n").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 3,
+        "three bare-name calls must produce three pending rows");
+
+    // A defines all three.
+    fs::write(project_dir.path().join("a.ts"),
+        "export function foo() {}\nexport function bar() {}\nexport function baz() {}\n").unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
+        "all three pending rows must drain once their targets exist");
+
+    // All three resolved into real edges.
+    let caller_b_id = get_node_ids_by_name(db.conn(), "caller_b").unwrap()
+        .into_iter().next().unwrap().0;
+    let edges = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
+    let calls_count = edges.iter().filter(|e| e.relation == REL_CALLS).count();
+    assert_eq!(calls_count, 3,
+        "caller_b must have exactly three calls edges (foo, bar, baz); got {} edges total: {:?}",
+        calls_count, edges.iter().map(|e| (&e.relation, e.target_id)).collect::<Vec<_>>());
+}
+
+/// When the caller's source file is reindexed (e.g. user edits B), the
+/// cascade FK on pending_unresolved_calls(source_id) must drop B's pending
+/// rows so a fresh Phase 2 can re-buffer them with the current source IDs.
+/// This is the schema's load-bearing self-cleaning property — we test it
+/// explicitly so a future migration that drops or weakens the FK fails
+/// loudly here rather than leaking pending rows for ever-removed callers.
+#[test]
+fn test_pending_cascade_deletes_when_caller_file_reindexed() {
+    use crate::storage::queries::count_pending_unresolved_calls;
+
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // B with undefined target → pending row created.
+    fs::write(project_dir.path().join("b.ts"),
+        "function caller_b() { undefined_target(); }\n").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1);
+
+    // Edit B to remove the call entirely. caller_b's old node gets
+    // cascade-deleted on reindex (Phase 1 deletes prior rows), and its
+    // pending row must follow it via ON DELETE CASCADE on source_id.
+    fs::write(project_dir.path().join("b.ts"),
+        "function caller_b() { /* call removed */ }\n").unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
+        "pending row must be cascade-deleted when its source caller is removed/reindexed");
+}
+
+/// Inverse-direction symmetry test for `feedback_incremental_edge_timing.md`:
+/// existing edge B → A.foo gets cascade-deleted when A is removed, and B
+/// is NOT in changed_paths (deletion doesn't re-extract B). Without Phase 0
+/// pre-cascade buffering, B has neither edge nor pending row — a permanent
+/// silent edge loss until full rebuild. The Phase 0 buffer (added by this
+/// fix) must capture B's call as a pending row before cascade fires.
+#[test]
+fn test_pending_buffers_on_callee_file_deletion() {
+    use crate::storage::queries::{count_pending_unresolved_calls, get_node_ids_by_name};
+
+    let project_dir = TempDir::new().unwrap();
+    let db_dir = TempDir::new().unwrap();
+    let db = Database::open(&db_dir.path().join("index.db")).unwrap();
+
+    // Initial: A defines foo, B calls foo — edge B.caller_b → A.foo exists.
+    fs::write(project_dir.path().join("a.ts"),
+        "export function foo() {}\n").unwrap();
+    fs::write(project_dir.path().join("b.ts"),
+        "function caller_b() { foo(); }\n").unwrap();
+    run_full_index(&db, project_dir.path(), None, None).unwrap();
+
+    // No pending rows yet — call resolved at index time.
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
+        "fully-resolvable call must not produce a pending row");
+
+    let caller_b_id = get_node_ids_by_name(db.conn(), "caller_b").unwrap()
+        .into_iter().next().unwrap().0;
+    let foo_id_pre = get_node_ids_by_name(db.conn(), "foo").unwrap()
+        .into_iter().next().unwrap().0;
+    let edges_pre = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
+    assert!(edges_pre.iter().any(|e| e.relation == REL_CALLS && e.target_id == foo_id_pre),
+        "edge caller_b → foo must exist pre-deletion");
+
+    // Delete A. Phase 0 must buffer B's now-orphaned call into pending
+    // BEFORE cascade strips the edge.
+    fs::remove_file(project_dir.path().join("a.ts")).unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    // foo is gone.
+    assert!(get_node_ids_by_name(db.conn(), "foo").unwrap().is_empty(),
+        "foo must be cascade-deleted with file a.ts");
+
+    // B's edge to old foo is gone, but pending row holds the call.
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 1,
+        "Phase 0 must buffer the orphaned inbound call into pending");
+
+    // Re-add A — pending sweep promotes the buffered call to a fresh edge.
+    fs::write(project_dir.path().join("a.ts"),
+        "export function foo() {}\n").unwrap();
+    run_incremental_index(&db, project_dir.path(), None, None).unwrap();
+
+    assert_eq!(count_pending_unresolved_calls(db.conn()).unwrap(), 0,
+        "pending must drain once foo reappears");
+
+    let foo_id_post = get_node_ids_by_name(db.conn(), "foo").unwrap()
+        .into_iter().next().unwrap().0;
+    let edges_post = crate::storage::queries::get_edges_from(db.conn(), caller_b_id).unwrap();
+    assert!(edges_post.iter().any(|e| e.relation == REL_CALLS && e.target_id == foo_id_post),
+        "edge caller_b → foo must reappear post re-add via pending sweep");
+}