Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions code_review_graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,24 +306,43 @@ def get_all_files(self) -> list[str]:
return [r["file_path"] for r in rows]

def search_nodes(self, query: str, limit: int = 20) -> list[GraphNode]:
"""Keyword search across node names with multi-word AND logic.
"""Keyword search across node names.

Each word in the query must match independently (case-insensitive)
against the node name or qualified name. For example,
``"firebase auth"`` matches ``verify_firebase_token`` and
``FirebaseAuth`` but not ``get_user``.
Tries FTS5 first (fast, tokenized matching), then falls back to
LIKE-based substring search when FTS5 returns no results.
"""
words = query.lower().split()
words = query.split()
if not words:
return []

# Phase 1: FTS5 search (uses the indexed nodes_fts table)
try:
if len(words) == 1:
fts_query = '"' + query.replace('"', '""') + '"'
else:
fts_query = " AND ".join(
'"' + w.replace('"', '""') + '"' for w in words
)
rows = self._conn.execute(
"SELECT n.* FROM nodes_fts f "
"JOIN nodes n ON f.rowid = n.id "
"WHERE nodes_fts MATCH ? LIMIT ?",
(fts_query, limit),
).fetchall()
if rows:
return [self._row_to_node(r) for r in rows]
except Exception:
pass # FTS5 table may not exist on older schemas

# Phase 2: LIKE fallback (substring matching)
conditions: list[str] = []
params: list[str | int] = []
for word in words:
w = word.lower()
conditions.append(
"(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)"
)
params.extend([f"%{word}%", f"%{word}%"])
params.extend([f"%{w}%", f"%{w}%"])

where = " AND ".join(conditions)
sql = f"SELECT * FROM nodes WHERE {where} LIMIT ?" # nosec B608
Expand Down
10 changes: 10 additions & 0 deletions code_review_graph/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,15 @@ def _migrate_v5(conn: sqlite3.Connection) -> None:
logger.info("Migration v5: created nodes_fts FTS5 virtual table")


def _migrate_v6(conn: sqlite3.Connection) -> None:
"""v6: Add composite index on edges for upsert_edge performance."""
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_edges_composite
ON edges(kind, source_qualified, target_qualified, file_path, line)
""")
logger.info("Migration v6: created composite edge index")


# ---------------------------------------------------------------------------
# Migration registry
# ---------------------------------------------------------------------------
Expand All @@ -165,6 +174,7 @@ def _migrate_v5(conn: sqlite3.Connection) -> None:
3: _migrate_v3,
4: _migrate_v4,
5: _migrate_v5,
6: _migrate_v6,
}

LATEST_VERSION = max(MIGRATIONS.keys())
Expand Down
12 changes: 10 additions & 2 deletions code_review_graph/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,14 @@ def _fts_search(
Returns list of ``(node_id, bm25_score)`` tuples. The BM25 score is
negated so higher = better (FTS5 returns negative BM25).
"""
# Sanitize: wrap in double quotes to prevent FTS5 operator injection
safe_query = '"' + query.replace('"', '""') + '"'
# Split multi-word queries into AND-joined terms so "graph store" matches
# both "GraphStore" and nodes containing both words (not just exact phrase).
# Each term is quoted to prevent FTS5 operator injection.
terms = query.split()
if len(terms) <= 1:
safe_query = '"' + query.replace('"', '""') + '"'
else:
safe_query = " AND ".join('"' + t.replace('"', '""') + '"' for t in terms)

try:
rows = conn.execute(
Expand Down Expand Up @@ -357,6 +363,8 @@ def hybrid_search(
boost *= kind_boosts["_qualified"]
if context_set and file_path in context_set:
boost *= 1.5
if row["is_test"]:
boost *= 0.5

boosted.append((node_id, score * boost))

Expand Down
37 changes: 25 additions & 12 deletions code_review_graph/tools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,20 @@ def query_graph(
node = candidates[0]
target = node.qualified_name
elif len(candidates) > 1:
return {
"status": "ambiguous",
"summary": (
f"Multiple matches for '{target}'. "
"Please use a qualified name."
),
"candidates": [node_to_dict(c) for c in candidates],
}
# Prefer non-test nodes when exactly one production candidate
non_test = [c for c in candidates if not c.is_test]
if len(non_test) == 1:
node = non_test[0]
target = node.qualified_name
else:
return {
"status": "ambiguous",
"summary": (
f"Multiple matches for '{target}'. "
"Please use a qualified name."
),
"candidates": [node_to_dict(c) for c in candidates],
}

if not node and pattern != "file_summary":
return {
Expand All @@ -192,10 +198,12 @@ def query_graph(
qn = node.qualified_name if node else target

if pattern == "callers_of":
seen_qn: set[str] = set()
for e in store.get_edges_by_target(qn):
if e.kind == "CALLS":
caller = store.get_node(e.source_qualified)
if caller:
if caller and caller.qualified_name not in seen_qn:
seen_qn.add(caller.qualified_name)
results.append(node_to_dict(caller))
edges_out.append(edge_to_dict(e))
# Fallback: CALLS edges store unqualified target names
Expand All @@ -204,15 +212,18 @@ def query_graph(
if not results and node:
for e in store.search_edges_by_target_name(node.name):
caller = store.get_node(e.source_qualified)
if caller:
if caller and caller.qualified_name not in seen_qn:
seen_qn.add(caller.qualified_name)
results.append(node_to_dict(caller))
edges_out.append(edge_to_dict(e))

elif pattern == "callees_of":
seen_qn: set[str] = set()
for e in store.get_edges_by_source(qn):
if e.kind == "CALLS":
callee = store.get_node(e.target_qualified)
if callee:
if callee and callee.qualified_name not in seen_qn:
seen_qn.add(callee.qualified_name)
results.append(node_to_dict(callee))
edges_out.append(edge_to_dict(e))

Expand Down Expand Up @@ -261,10 +272,12 @@ def query_graph(
results.append(node_to_dict(t))

elif pattern == "inheritors_of":
seen_qn: set[str] = set()
for e in store.get_edges_by_target(qn):
if e.kind in ("INHERITS", "IMPLEMENTS"):
child = store.get_node(e.source_qualified)
if child:
if child and child.qualified_name not in seen_qn:
seen_qn.add(child.qualified_name)
results.append(node_to_dict(child))
edges_out.append(edge_to_dict(e))

Expand Down
Loading