diff --git a/CHANGELOG.md b/CHANGELOG.md index f47a176..080ddbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## [2.2.0] - 2026-04-05 + +### Added +- **PreToolUse search enrichment**: New `code-review-graph enrich` CLI subcommand and Claude Code hook. When agents use Grep/Glob/Bash(rg/grep)/Read, results are automatically enriched with callers, callees, execution flows, community membership, and test coverage from the graph. Zero-friction adoption -- agents get structural context passively. +- **Platform-aware instructions**: CLAUDE.md gets lighter instructions (hooks handle exploration), non-hook platforms (.cursorrules, AGENTS.md, etc.) get stronger "prefer graph tools" guidance with full tool table. + +### Changed +- **CLAUDE.md instructions**: Removed "ALWAYS use graph tools BEFORE Grep/Glob/Read" directive -- the enrich hook now handles this passively. Instructions now focus on deep-analysis tools only (detect_changes, impact_radius, etc.), saving ~150 tokens per conversation. + +### Upgrade note +If upgrading from v2.1.0, delete the `` section from your CLAUDE.md (and .cursorrules, AGENTS.md, etc.) and re-run `code-review-graph install` to get the updated instructions. + +### Fixed +- **Multi-word FTS5 search**: Queries now use AND logic (`"graph" AND "store"`) instead of phrase matching, so "graph store" finds GraphStore +- **Deduplicated query results**: `callers_of`/`callees_of`/`inheritors_of` no longer return duplicate nodes when multiple call-site edges exist +- **Ambiguous query auto-resolution**: Bare-name queries with multiple matches now auto-resolve to the production function when exactly one non-test candidate exists +- **Test function deprioritization**: Search results apply 0.5x score penalty to test functions so production code ranks higher +- **Composite edge index**: v6 migration adds composite index on edges for faster `upsert_edge` performance + ## [2.1.0] - 2026-04-03 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index cb82907..89b0581 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -14,10 +14,11 @@ - `incremental.py` — Git-based change detection, file watching - `embeddings.py` — Optional vector embeddings (Local sentence-transformers, Google Gemini, MiniMax) - `visualization.py` — D3.js interactive HTML graph generator - - `cli.py` — CLI entry point (install, build, update, watch, status, visualize, serve, wiki, detect-changes, register, unregister, repos, eval) + - `cli.py` — CLI entry point (install, build, update, watch, status, visualize, serve, wiki, detect-changes, enrich, register, unregister, repos, eval) - `flows.py` — Execution flow detection and criticality scoring - `communities.py` — Community detection (Leiden algorithm or file-based grouping) and architecture overview - `search.py` — FTS5 hybrid search (keyword + vector) + - `enrich.py` — PreToolUse search enrichment (callers, callees, flows, communities for Grep/Glob/Read) - `changes.py` — Risk-scored change impact analysis (detect-changes) - `refactor.py` — Rename preview, dead code detection, refactoring suggestions - `hints.py` — Review hint generation @@ -25,7 +26,7 @@ - `wiki.py` — Markdown wiki generation from community structure - `skills.py` — Skill definitions for Claude Code plugin - `registry.py` — Multi-repo registry with connection pool - - `migrations.py` — Database schema migrations (v1-v5) + - `migrations.py` — Database schema migrations (v1-v6) - `tsconfig_resolver.py` — TypeScript path alias resolution - **VS Code Extension**: `code-review-graph-vscode/` (TypeScript) @@ -38,7 +39,7 @@ ```bash # Development -uv run pytest tests/ --tb=short -q # Run tests (572 tests) +uv run pytest tests/ --tb=short -q # Run tests (589 tests) uv run ruff check code_review_graph/ # Lint uv run mypy code_review_graph/ --ignore-missing-imports --no-strict-optional @@ -88,6 +89,7 @@ uv run code-review-graph eval # Run evaluation benchmarks - `tests/test_changes.py` — Risk-scored change analysis - `tests/test_refactor.py` — Rename preview, dead code, suggestions - `tests/test_search.py` — FTS5 hybrid search +- `tests/test_enrich.py` — PreToolUse search enrichment - `tests/test_hints.py` — Review hint generation - `tests/test_prompts.py` — MCP prompt template tests - `tests/test_wiki.py` — Wiki generation diff --git a/code_review_graph/cli.py b/code_review_graph/cli.py index 0bf8e18..ff78850 100644 --- a/code_review_graph/cli.py +++ b/code_review_graph/cli.py @@ -11,6 +11,7 @@ code-review-graph visualize code-review-graph wiki code-review-graph detect-changes [--base BASE] [--brief] + code-review-graph enrich code-review-graph register [--alias name] code-review-graph unregister code-review-graph repos @@ -294,6 +295,9 @@ def main() -> None: ) detect_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)") + # enrich (PreToolUse hook -- reads hook JSON from stdin) + sub.add_parser("enrich", help="Enrich search results with graph context (hook)") + # serve serve_cmd = sub.add_parser("serve", help="Start MCP server (stdio transport)") serve_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)") @@ -313,6 +317,11 @@ def main() -> None: serve_main(repo_root=args.repo) return + if args.command == "enrich": + from .enrich import run_hook + run_hook() + return + if args.command == "eval": from .eval.reporter import generate_full_report, generate_readme_tables from .eval.runner import run_eval diff --git a/code_review_graph/enrich.py b/code_review_graph/enrich.py new file mode 100644 index 0000000..f95c334 --- /dev/null +++ b/code_review_graph/enrich.py @@ -0,0 +1,303 @@ +"""PreToolUse search enrichment for Claude Code hooks. + +Intercepts Grep/Glob/Bash/Read tool calls and enriches them with +structural context from the code knowledge graph: callers, callees, +execution flows, community membership, and test coverage. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import sys +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +# Flags that consume the next token in grep/rg commands +_RG_FLAGS_WITH_VALUES = frozenset({ + "-e", "-f", "-m", "-A", "-B", "-C", "-g", "--glob", + "-t", "--type", "--include", "--exclude", "--max-count", + "--max-depth", "--max-filesize", "--color", "--colors", + "--context-separator", "--field-match-separator", + "--path-separator", "--replace", "--sort", "--sortr", +}) + + +def extract_pattern(tool_name: str, tool_input: dict[str, Any]) -> str | None: + """Extract a search pattern from a tool call's input. + + Returns None if no meaningful pattern can be extracted. + """ + if tool_name == "Grep": + return tool_input.get("pattern") + + if tool_name == "Glob": + raw = tool_input.get("pattern", "") + # Extract meaningful name from glob: "**/auth*.ts" -> "auth" + # Skip pure extension globs like "**/*.ts" + match = re.search(r"[*/]([a-zA-Z][a-zA-Z0-9_]{2,})", raw) + return match.group(1) if match else None + + if tool_name == "Bash": + cmd = tool_input.get("command", "") + if not re.search(r"\brg\b|\bgrep\b", cmd): + return None + tokens = cmd.split() + found_cmd = False + skip_next = False + for token in tokens: + if skip_next: + skip_next = False + continue + if not found_cmd: + if re.search(r"\brg$|\bgrep$", token): + found_cmd = True + continue + if token.startswith("-"): + if token in _RG_FLAGS_WITH_VALUES: + skip_next = True + continue + cleaned = token.strip("'\"") + return cleaned if len(cleaned) >= 3 else None + return None + + return None + + +def _make_relative(file_path: str, repo_root: str) -> str: + """Make a file path relative to repo_root for display.""" + try: + return str(Path(file_path).relative_to(repo_root)) + except ValueError: + return file_path + + +def _get_community_name(conn: Any, community_id: int) -> str: + """Fetch a community name by ID.""" + row = conn.execute( + "SELECT name FROM communities WHERE id = ?", (community_id,) + ).fetchone() + return row["name"] if row else "" + + +def _get_flow_names_for_node(conn: Any, node_id: int) -> list[str]: + """Fetch execution flow names that a node participates in (max 3).""" + rows = conn.execute( + "SELECT f.name FROM flow_memberships fm " + "JOIN flows f ON fm.flow_id = f.id " + "WHERE fm.node_id = ? LIMIT 3", + (node_id,), + ).fetchall() + return [r["name"] for r in rows] + + +def _format_node_context( + node: Any, + store: Any, + conn: Any, + repo_root: str, +) -> list[str]: + """Format a single node's structural context as plain text lines.""" + from .graph import GraphNode + assert isinstance(node, GraphNode) + + qn = node.qualified_name + loc = _make_relative(node.file_path, repo_root) + if node.line_start: + loc = f"{loc}:{node.line_start}" + + header = f"{node.name} ({loc})" + + # Community + if node.extra.get("community_id"): + cname = _get_community_name(conn, node.extra["community_id"]) + if cname: + header += f" [{cname}]" + else: + # Check via direct query + row = conn.execute( + "SELECT community_id FROM nodes WHERE id = ?", (node.id,) + ).fetchone() + if row and row["community_id"]: + cname = _get_community_name(conn, row["community_id"]) + if cname: + header += f" [{cname}]" + + lines = [header] + + # Callers (max 5, deduplicated) + callers: list[str] = [] + seen: set[str] = set() + for e in store.get_edges_by_target(qn): + if e.kind == "CALLS" and len(callers) < 5: + c = store.get_node(e.source_qualified) + if c and c.name not in seen: + seen.add(c.name) + callers.append(c.name) + if callers: + lines.append(f" Called by: {', '.join(callers)}") + + # Callees (max 5, deduplicated) + callees: list[str] = [] + seen.clear() + for e in store.get_edges_by_source(qn): + if e.kind == "CALLS" and len(callees) < 5: + c = store.get_node(e.target_qualified) + if c and c.name not in seen: + seen.add(c.name) + callees.append(c.name) + if callees: + lines.append(f" Calls: {', '.join(callees)}") + + # Execution flows + flow_names = _get_flow_names_for_node(conn, node.id) + if flow_names: + lines.append(f" Flows: {', '.join(flow_names)}") + + # Tests + tests: list[str] = [] + for e in store.get_edges_by_target(qn): + if e.kind == "TESTED_BY" and len(tests) < 3: + t = store.get_node(e.source_qualified) + if t: + tests.append(t.name) + if tests: + lines.append(f" Tests: {', '.join(tests)}") + + return lines + + +def enrich_search(pattern: str, repo_root: str) -> str: + """Search the graph for pattern and return enriched context.""" + from .graph import GraphStore + from .search import _fts_search + + db_path = Path(repo_root) / ".code-review-graph" / "graph.db" + if not db_path.exists(): + return "" + + store = GraphStore(db_path) + try: + conn = store._conn + + fts_results = _fts_search(conn, pattern, limit=8) + if not fts_results: + return "" + + all_lines: list[str] = [] + count = 0 + for node_id, _score in fts_results: + if count >= 5: + break + node = store.get_node_by_id(node_id) + if not node or node.is_test: + continue + node_lines = _format_node_context(node, store, conn, repo_root) + all_lines.extend(node_lines) + all_lines.append("") + count += 1 + + if not all_lines: + return "" + + header = f'[code-review-graph] {count} symbol(s) matching "{pattern}":\n' + return header + "\n".join(all_lines) + finally: + store.close() + + +def enrich_file_read(file_path: str, repo_root: str) -> str: + """Enrich a file read with structural context for functions in that file.""" + from .graph import GraphStore + + db_path = Path(repo_root) / ".code-review-graph" / "graph.db" + if not db_path.exists(): + return "" + + store = GraphStore(db_path) + try: + conn = store._conn + nodes = store.get_nodes_by_file(file_path) + if not nodes: + # Try with resolved path + try: + resolved = str(Path(file_path).resolve()) + nodes = store.get_nodes_by_file(resolved) + except (OSError, ValueError): + pass + if not nodes: + return "" + + # Filter to functions/classes/types (skip File nodes), limit to 10 + interesting = [ + n for n in nodes + if n.kind in ("Function", "Class", "Type", "Test") + ][:10] + + if not interesting: + return "" + + all_lines: list[str] = [] + for node in interesting: + node_lines = _format_node_context(node, store, conn, repo_root) + all_lines.extend(node_lines) + all_lines.append("") + + rel_path = _make_relative(file_path, repo_root) + header = ( + f"[code-review-graph] {len(interesting)} symbol(s) in {rel_path}:\n" + ) + return header + "\n".join(all_lines) + finally: + store.close() + + +def run_hook() -> None: + """Entry point for the enrich CLI subcommand. + + Reads Claude Code hook JSON from stdin, extracts the search pattern, + queries the graph, and outputs hookSpecificOutput JSON to stdout. + """ + try: + hook_input = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + return + + tool_name = hook_input.get("tool_name", "") + tool_input = hook_input.get("tool_input", {}) + cwd = hook_input.get("cwd", os.getcwd()) + + # Find repo root by walking up from cwd + from .incremental import find_project_root + + repo_root = str(find_project_root(Path(cwd))) + db_path = Path(repo_root) / ".code-review-graph" / "graph.db" + if not db_path.exists(): + return + + # Dispatch + context = "" + if tool_name == "Read": + fp = tool_input.get("file_path", "") + if fp: + context = enrich_file_read(fp, repo_root) + else: + pattern = extract_pattern(tool_name, tool_input) + if not pattern or len(pattern) < 3: + return + context = enrich_search(pattern, repo_root) + + if not context: + return + + response = { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "additionalContext": context, + } + } + json.dump(response, sys.stdout) diff --git a/code_review_graph/skills.py b/code_review_graph/skills.py index 1920709..04c738b 100644 --- a/code_review_graph/skills.py +++ b/code_review_graph/skills.py @@ -304,15 +304,21 @@ def generate_skills(repo_root: Path, skills_dir: Path | None = None) -> Path: def generate_hooks_config() -> dict[str, Any]: - """Generate Claude Code hooks configuration. + """Generate Claude Code settings configuration. - Returns a hooks config dict with PostToolUse, SessionStart, and - PreCommit hooks for automatic graph updates. + Returns a settings dict with permissions (auto-allow MCP tools) and + hooks (PostToolUse, SessionStart, PreToolUse) for automatic graph + updates and search enrichment. Returns: - Dict with hooks configuration suitable for .claude/settings.json. + Dict suitable for merging into .claude/settings.json. """ return { + "permissions": { + "allow": [ + "mcp__code-review-graph__*", + ], + }, "hooks": { "PostToolUse": [ { @@ -332,16 +338,26 @@ def generate_hooks_config() -> dict[str, Any]: "command": "code-review-graph detect-changes --brief", "timeout": 10000, }, + { + "matcher": "Grep|Glob|Bash|Read", + "hooks": [ + { + "type": "command", + "command": "code-review-graph enrich", + "timeout": 5000, + } + ], + }, ], } } def install_hooks(repo_root: Path) -> None: - """Write hooks config to .claude/settings.json. + """Write hooks and permissions config to .claude/settings.json. - Merges with existing settings if present, preserving non-hook - configuration. + Merges with existing settings, preserving user's own permission + rules and non-hook configuration. Args: repo_root: Repository root directory. @@ -357,11 +373,21 @@ def install_hooks(repo_root: Path) -> None: except (json.JSONDecodeError, OSError) as exc: logger.warning("Could not read existing %s: %s", settings_path, exc) - hooks_config = generate_hooks_config() - existing.update(hooks_config) + config = generate_hooks_config() + + # Deep-merge permissions.allow (don't clobber user's existing rules) + if "permissions" in config: + existing_perms = existing.setdefault("permissions", {}) + existing_allow = existing_perms.setdefault("allow", []) + for rule in config["permissions"]["allow"]: + if rule not in existing_allow: + existing_allow.append(rule) + del config["permissions"] + + existing.update(config) settings_path.write_text(json.dumps(existing, indent=2) + "\n") - logger.info("Wrote hooks config: %s", settings_path) + logger.info("Wrote settings config: %s", settings_path) _CLAUDE_MD_SECTION_MARKER = "" @@ -369,41 +395,44 @@ def install_hooks(repo_root: Path) -> None: _CLAUDE_MD_SECTION = f"""{_CLAUDE_MD_SECTION_MARKER} ## MCP Tools: code-review-graph -**IMPORTANT: This project has a knowledge graph. ALWAYS use the -code-review-graph MCP tools BEFORE using Grep/Glob/Read to explore -the codebase.** The graph is faster, cheaper (fewer tokens), and gives -you structural context (callers, dependents, test coverage) that file -scanning cannot. +This project has a structural knowledge graph that auto-updates on file changes. +Routine Grep/Glob/Read results are automatically enriched with callers, callees, +flows, and test coverage (via hooks -- no action needed). -### When to use graph tools FIRST +Use these tools for **deep analysis** that enrichment doesn't cover: + +| Tool | Use when | +|------|----------| +| `detect_changes` | Reviewing code changes -- risk-scored analysis | +| `get_review_context` | Token-efficient source snippets for review | +| `get_impact_radius` | Understanding blast radius of a change | +| `get_affected_flows` | Finding which execution paths are impacted | +| `get_architecture_overview` | High-level codebase structure | +| `refactor_tool` | Planning renames, finding dead code | +""" -- **Exploring code**: `semantic_search_nodes` or `query_graph` instead of Grep -- **Understanding impact**: `get_impact_radius` instead of manually tracing imports -- **Code review**: `detect_changes` + `get_review_context` instead of reading entire files -- **Finding relationships**: `query_graph` with callers_of/callees_of/imports_of/tests_for -- **Architecture questions**: `get_architecture_overview` + `list_communities` +_PLATFORM_SECTION_MARKER = "" -Fall back to Grep/Glob/Read **only** when the graph doesn't cover what you need. +_PLATFORM_SECTION = f"""{_PLATFORM_SECTION_MARKER} +## MCP Tools: code-review-graph -### Key Tools +This project has a structural knowledge graph. Prefer these MCP tools over +Grep/Glob/Read for code exploration -- they give you structural context +(callers, dependents, test coverage) that file scanning cannot. | Tool | Use when | |------|----------| -| `detect_changes` | Reviewing code changes — gives risk-scored analysis | -| `get_review_context` | Need source snippets for review — token-efficient | +| `semantic_search_nodes` | Finding functions/classes by name or keyword | +| `query_graph` | Tracing callers, callees, imports, tests, dependencies | +| `detect_changes` | Reviewing code changes -- risk-scored analysis | +| `get_review_context` | Token-efficient source snippets for review | | `get_impact_radius` | Understanding blast radius of a change | | `get_affected_flows` | Finding which execution paths are impacted | -| `query_graph` | Tracing callers, callees, imports, tests, dependencies | -| `semantic_search_nodes` | Finding functions/classes by name or keyword | -| `get_architecture_overview` | Understanding high-level codebase structure | +| `get_architecture_overview` | High-level codebase structure | | `refactor_tool` | Planning renames, finding dead code | -### Workflow - -1. The graph auto-updates on file changes (via hooks). -2. Use `detect_changes` for code review. -3. Use `get_affected_flows` to understand impact. -4. Use `query_graph` pattern=\"tests_for\" to check coverage. +The graph auto-updates. Use `detect_changes` for code review, +`get_affected_flows` for impact, `query_graph` pattern="tests_for" for coverage. """ @@ -447,17 +476,17 @@ def inject_claude_md(repo_root: Path) -> None: def inject_platform_instructions(repo_root: Path) -> list[str]: - """Inject 'use graph first' instructions into all platform rule files. + """Inject 'prefer graph tools' instructions into non-hook platform files. Generates AGENTS.md, GEMINI.md, .cursorrules, and .windsurfrules - with instructions to prefer code-review-graph MCP tools over - manual file scanning. + with stronger instructions since these platforms lack PreToolUse + hooks for passive enrichment. Returns list of files that were created or updated. """ updated: list[str] = [] for label, filename in _PLATFORM_INSTRUCTION_FILES.items(): path = repo_root / filename - if _inject_instructions(path, _CLAUDE_MD_SECTION_MARKER, _CLAUDE_MD_SECTION): + if _inject_instructions(path, _PLATFORM_SECTION_MARKER, _PLATFORM_SECTION): updated.append(label) return updated diff --git a/docs/COMMANDS.md b/docs/COMMANDS.md index 7caae26..1b84d9f 100644 --- a/docs/COMMANDS.md +++ b/docs/COMMANDS.md @@ -242,6 +242,9 @@ code-review-graph detect-changes # Risk-scored change analysis code-review-graph detect-changes --base HEAD~3 # Custom base ref code-review-graph detect-changes --brief # Compact output +# Enrichment (PreToolUse hook) +code-review-graph enrich # Enrich search results with graph context + # Wiki code-review-graph wiki # Generate markdown wiki from communities diff --git a/docs/FEATURES.md b/docs/FEATURES.md index d3cb4aa..1933ca2 100644 --- a/docs/FEATURES.md +++ b/docs/FEATURES.md @@ -1,9 +1,18 @@ # Features -## v2.1.0 (Current) +## v2.2.0 (Current) +- **PreToolUse search enrichment**: Claude Code hook enriches Grep/Glob/Bash/Read results with callers, callees, execution flows, community membership, and test coverage. Zero-friction adoption -- agents get structural context passively. +- **Multi-word FTS5 AND search**: Queries use AND logic so "graph store" finds GraphStore. +- **Deduplicated query results**: `callers_of`/`callees_of`/`inheritors_of` no longer return duplicates. +- **Ambiguous query auto-resolution**: Bare-name queries prefer non-test candidates. +- **Test function deprioritization**: 0.5x score penalty in search results. +- **Database migrations v1-v6**: Composite edge index for faster queries. +- **589 tests** across 23 test files. + +## v2.1.0 - **22 MCP tools** (up from 9): 13 new tools for flows, communities, architecture, refactoring, wiki, multi-repo, and risk-scored change detection. - **5 MCP prompts**: `review_changes`, `architecture_map`, `debug_issue`, `onboard_developer`, `pre_merge_check` workflow templates. -- **18 languages** (up from 15): Added Dart, R, Perl support. +- **18 languages** (up from 15): Added Dart, R, Perl support. Lua added in v2.1.0. - **Execution flows**: Trace call chains from entry points (HTTP handlers, CLI commands, tests), sorted by criticality score. - **Community detection**: Cluster related code entities via Leiden algorithm (igraph) or file-based grouping. - **Architecture overview**: Auto-generated architecture map with module summaries and cross-community coupling warnings. @@ -12,7 +21,7 @@ - **Wiki generation**: Auto-generate markdown wiki pages for each community with optional LLM summaries (ollama). - **Multi-repo registry**: Register multiple repositories, search across all of them with `cross_repo_search`. - **Full-text search**: FTS5 virtual table with porter stemming for hybrid keyword + vector search. -- **Database migrations**: Versioned schema migrations (v1-v5) with automatic upgrade on startup. +- **Database migrations**: Versioned schema migrations (v1-v6) with automatic upgrade on startup. - **Optional dependency groups**: `[embeddings]`, `[google-embeddings]`, `[communities]`, `[eval]`, `[wiki]`, `[all]`. - **Evaluation framework**: Benchmark suite with matplotlib visualization. - **TypeScript path resolution**: tsconfig.json paths/baseUrl alias resolution for imports. diff --git a/docs/LLM-OPTIMIZED-REFERENCE.md b/docs/LLM-OPTIMIZED-REFERENCE.md index f3b4890..91e8a36 100644 --- a/docs/LLM-OPTIMIZED-REFERENCE.md +++ b/docs/LLM-OPTIMIZED-REFERENCE.md @@ -1,4 +1,4 @@ -# LLM-OPTIMIZED REFERENCE -- code-review-graph v2.1.0 +# LLM-OPTIMIZED REFERENCE -- code-review-graph v2.2.0 Claude Code: Read ONLY the exact `
` you need. Never load the whole file. @@ -27,7 +27,7 @@ Never include full files unless explicitly asked. MCP tools (22): build_or_update_graph_tool, get_impact_radius_tool, query_graph_tool, get_review_context_tool, semantic_search_nodes_tool, embed_graph_tool, list_graph_stats_tool, get_docs_section_tool, find_large_functions_tool, list_flows_tool, get_flow_tool, get_affected_flows_tool, list_communities_tool, get_community_tool, get_architecture_overview_tool, detect_changes_tool, refactor_tool, apply_refactor_tool, generate_wiki_tool, get_wiki_page_tool, list_repos_tool, cross_repo_search_tool MCP prompts (5): review_changes, architecture_map, debug_issue, onboard_developer, pre_merge_check Skills: build-graph, review-delta, review-pr -CLI: code-review-graph [install|init|build|update|status|watch|visualize|serve|wiki|detect-changes|register|unregister|repos|eval] +CLI: code-review-graph [install|init|build|update|status|watch|visualize|serve|wiki|detect-changes|enrich|register|unregister|repos|eval]
@@ -37,6 +37,8 @@ MIT license. 100% local. No telemetry. DB file: .code-review-graph/graph.db
Run: code-review-graph watch (auto-updates graph on file save via watchdog) Or use PostToolUse (Write|Edit|Bash) hooks for automatic background updates. +PreToolUse hooks with `if: "Bash(git commit*)"` run detect-changes before commits. +PreToolUse hooks on Grep|Glob|Bash|Read run `code-review-graph enrich` to inject callers, callees, flows, and community context into search results.
@@ -48,7 +50,7 @@ Configure via CRG_EMBEDDING_MODEL env var or model parameter.
-Supported (18): Python, TypeScript/TSX, JavaScript, Vue, Go, Rust, Java, Scala, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++, Dart, R, Perl +Supported (19): Python, TypeScript/TSX, JavaScript, Vue, Go, Rust, Java, Scala, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++, Dart, R, Perl, Lua Parser: Tree-sitter via tree-sitter-language-pack
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index b020b98..0698526 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -2,6 +2,13 @@ ## Shipped +### v2.2.0 +- PreToolUse search enrichment (`code-review-graph enrich`) +- Multi-word FTS5 AND search, deduplicated query results, ambiguous auto-resolution +- Test function deprioritization in search +- Composite edge index (v6 migration) +- 589 tests across 23 test files + ### v2.0.0 - 22 MCP tools (up from 9) and 5 MCP prompts - 18 languages (added Dart, R, Perl) @@ -13,7 +20,7 @@ - Wiki generation from community structure - Multi-repo registry with cross-repo search - FTS5 full-text search with porter stemming -- Database migrations (v1-v5) +- Database migrations (v1-v6) - Evaluation framework with matplotlib visualization - TypeScript tsconfig path alias resolution - MiniMax embedding provider (embo-01) diff --git a/docs/USAGE.md b/docs/USAGE.md index 6b8f30f..0da2846 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -95,6 +95,17 @@ code-review-graph register /path/to/other/repo --alias mylib ``` Then use `cross_repo_search_tool` to search across all registered repositories. +### 11. Enrich agent search results (v2.2) + +When installed via `code-review-graph install`, a PreToolUse hook automatically enriches Grep/Glob/Bash(rg/grep)/Read results with graph context: + +- **Callers and callees** of matched symbols +- **Execution flows** the symbol participates in +- **Community membership** (which module/area) +- **Test coverage** (which tests cover the symbol) + +This is zero-friction -- agents get structural context passively alongside every search without needing to explicitly call graph tools. + ## Token Savings | Scenario | Without graph | With graph | diff --git a/tests/test_enrich.py b/tests/test_enrich.py new file mode 100644 index 0000000..862f20c --- /dev/null +++ b/tests/test_enrich.py @@ -0,0 +1,237 @@ +"""Tests for the PreToolUse search enrichment module.""" + +import tempfile +from pathlib import Path + +from code_review_graph.enrich import ( + enrich_file_read, + enrich_search, + extract_pattern, +) +from code_review_graph.graph import GraphStore +from code_review_graph.parser import EdgeInfo, NodeInfo +from code_review_graph.search import rebuild_fts_index + + +class TestExtractPattern: + def test_grep_pattern(self): + assert extract_pattern("Grep", {"pattern": "parse_file"}) == "parse_file" + + def test_grep_empty(self): + assert extract_pattern("Grep", {}) is None + + def test_glob_meaningful_name(self): + assert extract_pattern("Glob", {"pattern": "**/auth*.ts"}) == "auth" + + def test_glob_pure_extension(self): + assert extract_pattern("Glob", {"pattern": "**/*.ts"}) is None + + def test_glob_short_name(self): + # "ab" is only 2 chars, below minimum regex match of 3 + assert extract_pattern("Glob", {"pattern": "**/ab.ts"}) is None + + def test_bash_rg_pattern(self): + result = extract_pattern("Bash", {"command": "rg parse_file src/"}) + assert result == "parse_file" + + def test_bash_grep_pattern(self): + result = extract_pattern("Bash", {"command": "grep -r 'GraphStore' ."}) + assert result == "GraphStore" + + def test_bash_rg_with_flags(self): + result = extract_pattern("Bash", {"command": "rg -t py -i parse_file"}) + assert result == "parse_file" + + def test_bash_non_grep_command(self): + assert extract_pattern("Bash", {"command": "ls -la"}) is None + + def test_bash_short_pattern(self): + # Pattern "ab" is only 2 chars + assert extract_pattern("Bash", {"command": "rg ab src/"}) is None + + def test_unknown_tool(self): + assert extract_pattern("Write", {"content": "hello"}) is None + + def test_bash_rg_with_glob_flag(self): + result = extract_pattern( + "Bash", {"command": "rg --glob '*.py' parse_file"} + ) + assert result == "parse_file" + + +class TestEnrichSearch: + def setup_method(self): + self.tmpdir = tempfile.mkdtemp() + self.db_dir = Path(self.tmpdir) / ".code-review-graph" + self.db_dir.mkdir() + self.db_path = self.db_dir / "graph.db" + self.store = GraphStore(self.db_path) + self._seed_data() + + def teardown_method(self): + self.store.close() + + def _seed_data(self): + nodes = [ + NodeInfo( + kind="Function", name="parse_file", file_path=f"{self.tmpdir}/parser.py", + line_start=10, line_end=50, language="python", + params="(path: str)", return_type="list[Node]", + ), + NodeInfo( + kind="Function", name="full_build", file_path=f"{self.tmpdir}/build.py", + line_start=1, line_end=30, language="python", + ), + NodeInfo( + kind="Test", name="test_parse_file", + file_path=f"{self.tmpdir}/test_parser.py", + line_start=1, line_end=20, language="python", + is_test=True, + ), + ] + for n in nodes: + self.store.upsert_node(n) + edges = [ + EdgeInfo( + kind="CALLS", + source=f"{self.tmpdir}/build.py::full_build", + target=f"{self.tmpdir}/parser.py::parse_file", + file_path=f"{self.tmpdir}/build.py", line=15, + ), + EdgeInfo( + kind="TESTED_BY", + source=f"{self.tmpdir}/test_parser.py::test_parse_file", + target=f"{self.tmpdir}/parser.py::parse_file", + file_path=f"{self.tmpdir}/test_parser.py", line=1, + ), + ] + for e in edges: + self.store.upsert_edge(e) + rebuild_fts_index(self.store) + + def test_returns_matching_symbols(self): + result = enrich_search("parse_file", self.tmpdir) + assert "[code-review-graph]" in result + assert "parse_file" in result + + def test_includes_callers(self): + result = enrich_search("parse_file", self.tmpdir) + assert "Called by:" in result + assert "full_build" in result + + def test_includes_tests(self): + result = enrich_search("parse_file", self.tmpdir) + assert "Tests:" in result + assert "test_parse_file" in result + + def test_excludes_test_nodes(self): + result = enrich_search("test_parse", self.tmpdir) + # test nodes should be filtered out of results + assert "test_parse_file" not in result or "symbol(s)" in result + + def test_empty_for_no_match(self): + result = enrich_search("nonexistent_function_xyz", self.tmpdir) + assert result == "" + + def test_empty_for_missing_db(self): + result = enrich_search("parse_file", "/tmp/nonexistent_repo_xyz") + assert result == "" + + +class TestEnrichFileRead: + def setup_method(self): + self.tmpdir = tempfile.mkdtemp() + self.db_dir = Path(self.tmpdir) / ".code-review-graph" + self.db_dir.mkdir() + self.db_path = self.db_dir / "graph.db" + self.store = GraphStore(self.db_path) + self._seed_data() + + def teardown_method(self): + self.store.close() + + def _seed_data(self): + self.file_path = f"{self.tmpdir}/parser.py" + nodes = [ + NodeInfo( + kind="File", name="parser.py", file_path=self.file_path, + line_start=1, line_end=100, language="python", + ), + NodeInfo( + kind="Function", name="parse_file", file_path=self.file_path, + line_start=10, line_end=50, language="python", + ), + NodeInfo( + kind="Function", name="parse_imports", file_path=self.file_path, + line_start=55, line_end=80, language="python", + ), + ] + for n in nodes: + self.store.upsert_node(n) + edges = [ + EdgeInfo( + kind="CALLS", + source=f"{self.file_path}::parse_file", + target=f"{self.file_path}::parse_imports", + file_path=self.file_path, line=30, + ), + ] + for e in edges: + self.store.upsert_edge(e) + self.store._conn.commit() + + def test_returns_file_symbols(self): + result = enrich_file_read(self.file_path, self.tmpdir) + assert "[code-review-graph]" in result + assert "parse_file" in result + assert "parse_imports" in result + + def test_excludes_file_nodes(self): + result = enrich_file_read(self.file_path, self.tmpdir) + # File node "parser.py" should not appear as a symbol entry + lines = result.split("\n") + symbol_lines = [ + ln for ln in lines + if ln and not ln.startswith(" ") and not ln.startswith("[") + ] + for line in symbol_lines: + assert "parser.py (" not in line or "parse_" in line + + def test_includes_callees(self): + result = enrich_file_read(self.file_path, self.tmpdir) + assert "Calls:" in result + assert "parse_imports" in result + + def test_empty_for_unknown_file(self): + result = enrich_file_read("/nonexistent/file.py", self.tmpdir) + assert result == "" + + def test_empty_for_missing_db(self): + result = enrich_file_read(self.file_path, "/tmp/nonexistent_repo_xyz") + assert result == "" + + +class TestRunHookOutput: + """Test the JSON output format of run_hook via enrich_search.""" + + def test_hook_json_format(self): + """Verify the hookSpecificOutput structure is correct.""" + # We test the format indirectly by checking enrich_search output + # since run_hook reads from stdin which is harder to test + tmpdir = tempfile.mkdtemp() + db_dir = Path(tmpdir) / ".code-review-graph" + db_dir.mkdir() + store = GraphStore(db_dir / "graph.db") + store.upsert_node( + NodeInfo( + kind="Function", name="my_function", + file_path=f"{tmpdir}/mod.py", + line_start=1, line_end=10, language="python", + ), + ) + rebuild_fts_index(store) + store.close() + + result = enrich_search("my_function", tmpdir) + assert result.startswith("[code-review-graph]") + assert "my_function" in result diff --git a/tests/test_skills.py b/tests/test_skills.py index 09e4fae..e21f311 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -93,6 +93,12 @@ def test_has_all_three_hook_types(self): hook_types = set(config["hooks"].keys()) assert hook_types == {"PostToolUse", "SessionStart", "PreCommit"} + def test_has_permissions_allow(self): + config = generate_hooks_config() + assert "permissions" in config + assert "allow" in config["permissions"] + assert "mcp__code-review-graph__*" in config["permissions"]["allow"] + class TestInstallHooks: def test_creates_settings_file(self, tmp_path): @@ -120,6 +126,33 @@ def test_creates_claude_directory(self, tmp_path): install_hooks(tmp_path) assert (tmp_path / ".claude").is_dir() + def test_merges_permissions_with_existing(self, tmp_path): + settings_dir = tmp_path / ".claude" + settings_dir.mkdir(parents=True) + existing = { + "permissions": { + "allow": ["Bash(npm run *)"], + }, + } + (settings_dir / "settings.json").write_text(json.dumps(existing)) + + install_hooks(tmp_path) + + data = json.loads((settings_dir / "settings.json").read_text()) + allow = data["permissions"]["allow"] + assert "Bash(npm run *)" in allow + assert "mcp__code-review-graph__*" in allow + + def test_no_duplicate_permissions(self, tmp_path): + install_hooks(tmp_path) + install_hooks(tmp_path) + + data = json.loads( + (tmp_path / ".claude" / "settings.json").read_text() + ) + allow = data["permissions"]["allow"] + assert allow.count("mcp__code-review-graph__*") == 1 + class TestInjectClaudeMd: def test_creates_section_in_new_file(self, tmp_path):