diff --git a/app/nominal_code/config/env.py b/app/nominal_code/config/env.py index 0283816..5bd8d0f 100644 --- a/app/nominal_code/config/env.py +++ b/app/nominal_code/config/env.py @@ -29,6 +29,7 @@ ("REVIEWER_SYSTEM_PROMPT", ["agent", "reviewer", "system_prompt"]), ("REVIEWER_SYSTEM_PROMPT_FILE", ["agent", "reviewer", "system_prompt_file"]), ("REVIEWER_TRIGGERS", ["reviewer", "triggers"]), + ("REVIEWER_IGNORE_PATTERNS", ["reviewer", "ignore_patterns"]), ("INLINE_SUGGESTIONS", ["reviewer", "inline_suggestions"]), ("AGENT_PROVIDER", ["agent", "reviewer", "provider"]), ("AGENT_MODEL", ["agent", "reviewer", "model"]), @@ -76,6 +77,7 @@ "PR_TITLE_INCLUDE_TAGS", "PR_TITLE_EXCLUDE_TAGS", "REVIEWER_TRIGGERS", + "REVIEWER_IGNORE_PATTERNS", "K8S_ENV_FROM_SECRETS", } ) diff --git a/app/nominal_code/config/loader.py b/app/nominal_code/config/loader.py index 224b979..35c701d 100644 --- a/app/nominal_code/config/loader.py +++ b/app/nominal_code/config/loader.py @@ -223,6 +223,8 @@ def _build_reviewer( if settings.reviewer.inline_suggestions: suggestions_prompt = load_prompt("reviewer_suggestions.md") + ignore_patterns: frozenset[str] = frozenset(settings.reviewer.ignore_patterns) + if require_webhook: if not settings.reviewer.bot_username: raise ValueError("REVIEWER_BOT_USERNAME must be set") @@ -230,11 +232,13 @@ def _build_reviewer( return ReviewerConfig( bot_username=settings.reviewer.bot_username, suggestions_prompt=suggestions_prompt, + ignore_patterns=ignore_patterns, ) return ReviewerConfig( bot_username="", suggestions_prompt=suggestions_prompt, + ignore_patterns=ignore_patterns, ) diff --git a/app/nominal_code/config/models.py b/app/nominal_code/config/models.py index df16bb2..34f3073 100644 --- a/app/nominal_code/config/models.py +++ b/app/nominal_code/config/models.py @@ -63,22 +63,28 @@ class WebhookSettings(BaseModel): class ReviewerSettings(BaseModel): """ - Reviewer bot identity settings. + Settings governing the reviewer bot. - These control how the bot appears on pull requests. Runtime concerns - (LLM provider, model, system prompt, max turns) live under - ``AgentSettings.reviewer``. + Covers the bot's identity, the events that trigger it, and how it + shapes reviews. Runtime concerns (LLM provider, model, system + prompt, max turns) live under ``AgentSettings.reviewer``. Attributes: bot_username (str): The @mention name for the reviewer bot. triggers (list[str]): PR lifecycle events that auto-trigger the reviewer. inline_suggestions (bool): Whether to enable one-click-apply code suggestions in review comments. + ignore_patterns (list[str]): fnmatch shell-glob patterns of file + paths to exclude from the diff before it reaches the reviewer. + Empty list means no filtering. ``*`` matches across path + separators, so ``vendor/**`` matches recursively. + """ bot_username: str | None = None triggers: list[str] = Field(default_factory=list) inline_suggestions: bool = True + ignore_patterns: list[str] = Field(default_factory=list) class AgentRoleSettings(BaseModel): diff --git a/app/nominal_code/config/settings.py b/app/nominal_code/config/settings.py index 6045807..b0d0fbd 100644 --- a/app/nominal_code/config/settings.py +++ b/app/nominal_code/config/settings.py @@ -60,23 +60,30 @@ class GitLabConfig(BaseModel): class ReviewerConfig(BaseModel): """ - Reviewer bot identity configuration. + Resolved reviewer bot configuration. - Holds only bot-identity concerns (user-facing behavior on the PR). - The reviewer's LLM runtime (provider, model, system prompt, max - turns) lives on ``AgentConfig.reviewer``. + Bundles bot identity, output style, and the diff-shaping rules the + reviewer applies when assembling the review context. The reviewer's + LLM runtime (provider, model, system prompt, max turns) lives on + ``AgentConfig.reviewer``; PR lifecycle triggers live on + ``WebhookConfig.routing`` since they govern dispatch, not the review. Attributes: bot_username (str): The @mention name for the reviewer bot. suggestions_prompt (str): Prompt section appended to the reviewer system prompt when non-empty, enabling one-click-apply code suggestions. + ignore_patterns (frozenset[str]): fnmatch shell-glob patterns of + file paths to exclude from the diff before it reaches the + reviewer agent. Empty means no filtering. ``*`` matches across + path separators, so ``vendor/**`` matches recursively. """ model_config = ConfigDict(frozen=True) bot_username: str suggestions_prompt: str = "" + ignore_patterns: frozenset[str] = frozenset() class PromptsConfig(BaseModel): diff --git a/app/nominal_code/review/diff.py b/app/nominal_code/review/diff.py index f145665..d3e15ef 100644 --- a/app/nominal_code/review/diff.py +++ b/app/nominal_code/review/diff.py @@ -1,6 +1,8 @@ from __future__ import annotations +import fnmatch import re +from collections.abc import Iterable from nominal_code.models import ChangedFile, DiffSide, ReviewFinding @@ -132,6 +134,44 @@ def build_diff_index( return index +def filter_changed_files( + changed_files: list[ChangedFile], + ignore_patterns: Iterable[str], +) -> tuple[list[ChangedFile], list[str]]: + """ + Split changed files by ignore-pattern match. + + Walks ``changed_files`` and excludes any whose ``file_path`` matches + at least one fnmatch pattern in ``ignore_patterns``. fnmatch shell-glob + semantics apply: ``*`` matches any characters including ``/``, so + ``vendor/**`` matches recursively. + + Args: + changed_files (list[ChangedFile]): Files in the PR diff. + ignore_patterns (Iterable[str]): Glob patterns to exclude. Empty + means no filtering. + + Returns: + tuple[list[ChangedFile], list[str]]: A pair of (kept, removed-paths). + """ + + patterns: tuple[str, ...] = tuple(ignore_patterns) + + if not patterns: + return changed_files, [] + + kept: list[ChangedFile] = [] + removed: list[str] = [] + + for changed_file in changed_files: + if any(fnmatch.fnmatch(changed_file.file_path, pat) for pat in patterns): + removed.append(changed_file.file_path) + else: + kept.append(changed_file) + + return kept, removed + + def filter_findings( findings: list[ReviewFinding], changed_files: list[ChangedFile], diff --git a/app/nominal_code/review/reviewer.py b/app/nominal_code/review/reviewer.py index 88fecf7..90cd54b 100644 --- a/app/nominal_code/review/reviewer.py +++ b/app/nominal_code/review/reviewer.py @@ -38,6 +38,7 @@ from nominal_code.prompts import load_prompt from nominal_code.review.diff import ( build_effective_summary, + filter_changed_files, filter_findings, ) from nominal_code.review.output import ( @@ -716,7 +717,11 @@ async def _prepare_review_context( return ReviewContext( repo_path=repo_path, deps_path=None, - changed_files=changed_files_result, + changed_files=_apply_ignore_patterns( + changed_files_result, + config=config, + event=event, + ), existing_comments=existing_comments, metadata=metadata_result, ) @@ -753,13 +758,60 @@ async def _prepare_review_context( return ReviewContext( repo_path=workspace.repo_path, deps_path=workspace.deps_path, - changed_files=results[0], + changed_files=_apply_ignore_patterns( + results[0], + config=config, + event=event, + ), existing_comments=existing_comments, metadata=results[2], workspace=workspace, ) +def _apply_ignore_patterns( + changed_files: list[ChangedFile], + *, + config: Config, + event: PullRequestEvent, +) -> list[ChangedFile]: + """ + Apply org-configured ignore patterns to the diff before review. + + Reads ``config.reviewer.ignore_patterns`` and excludes any matching + file paths. When at least one file is excluded, logs a single + WARNING line that mirrors ``filter_findings`` so observability is + consistent across diff filtering and finding filtering. + + Args: + changed_files (list[ChangedFile]): Files in the PR diff. + config (Config): Application configuration. + event (PullRequestEvent): The event being reviewed; used only + for the log line. + + Returns: + list[ChangedFile]: The retained files. Identical to ``changed_files`` + when no patterns are configured. + """ + + patterns: frozenset[str] = ( + config.reviewer.ignore_patterns if config.reviewer else frozenset() + ) + + kept, removed = filter_changed_files(changed_files, patterns) + + if removed: + logger.warning( + "Excluded %d of %d files by ignore_patterns for %s#%d", + len(removed), + len(changed_files), + event.repo_full_name, + event.pr_number, + ) + + return kept + + async def _fetch_pr_comments_or_empty( platform: Platform, event: PullRequestEvent, diff --git a/app/tests/review/test_diff.py b/app/tests/review/test_diff.py index 5f72712..86bba56 100644 --- a/app/tests/review/test_diff.py +++ b/app/tests/review/test_diff.py @@ -9,11 +9,89 @@ annotate_diff, build_diff_index, build_effective_summary, + filter_changed_files, filter_findings, parse_diff_lines, ) +def _changed(file_path): + return ChangedFile( + file_path=file_path, + status=FileStatus.MODIFIED, + patch="@@ -1,1 +1,1 @@\n-a\n+b\n", + ) + + +class TestFilterChangedFiles: + def test_empty_patterns_returns_all_files_unchanged(self): + files = [_changed("src/main.py"), _changed("README.md")] + + kept, removed = filter_changed_files(files, []) + + assert kept == files + assert removed == [] + + def test_single_glob_excludes_matching_extension(self): + files = [_changed("foo.lock"), _changed("src/main.py")] + + kept, removed = filter_changed_files(files, ["*.lock"]) + + assert [changed.file_path for changed in kept] == ["src/main.py"] + assert removed == ["foo.lock"] + + def test_double_star_glob_matches_recursively(self): + files = [ + _changed("vendor/a.py"), + _changed("vendor/sub/b.py"), + _changed("src/c.py"), + ] + + kept, removed = filter_changed_files(files, ["vendor/**"]) + + assert [changed.file_path for changed in kept] == ["src/c.py"] + assert sorted(removed) == ["vendor/a.py", "vendor/sub/b.py"] + + def test_multiple_patterns_match_any(self): + files = [ + _changed("foo.lock"), + _changed("dist/bundle.js"), + _changed("src/main.py"), + ] + + kept, removed = filter_changed_files( + files, + ["*.lock", "dist/**"], + ) + + assert [changed.file_path for changed in kept] == ["src/main.py"] + assert sorted(removed) == ["dist/bundle.js", "foo.lock"] + + def test_all_files_filtered_returns_empty_kept(self): + files = [_changed("foo.lock"), _changed("bar.lock")] + + kept, removed = filter_changed_files(files, ["*.lock"]) + + assert kept == [] + assert sorted(removed) == ["bar.lock", "foo.lock"] + + def test_no_match_returns_all_files(self): + files = [_changed("src/main.py"), _changed("README.md")] + + kept, removed = filter_changed_files(files, ["*.lock"]) + + assert kept == files + assert removed == [] + + def test_pattern_iterable_is_consumed_safely(self): + files = [_changed("foo.lock"), _changed("src/main.py")] + + kept, removed = filter_changed_files(files, iter(["*.lock"])) + + assert [changed.file_path for changed in kept] == ["src/main.py"] + assert removed == ["foo.lock"] + + class TestParseDiffLines: def test_parse_diff_lines_addition_lines_in_right(self): patch_text = "@@ -0,0 +1,3 @@\n+line one\n+line two\n+line three\n" diff --git a/app/tests/review/test_review.py b/app/tests/review/test_review.py index c887093..77e5688 100644 --- a/app/tests/review/test_review.py +++ b/app/tests/review/test_review.py @@ -32,6 +32,7 @@ MAX_EXISTING_COMMENTS, ReviewResult, ReviewScope, + _prepare_review_context, review, run_and_post_review, ) @@ -1697,3 +1698,136 @@ def test_build_codebase_reviewer_prompt_with_user_instructions(self): assert "Focus on error handling" in prompt assert "Additional instructions" in prompt + + +def _make_lifecycle_event(): + from nominal_code.platforms.base import LifecycleEvent + + return LifecycleEvent( + platform=PlatformName.GITHUB, + repo_full_name="owner/repo", + pr_number=42, + pr_branch="feat/x", + event_type=EventType.PR_OPENED, + clone_url="https://token@github.com/owner/repo.git", + base_branch="main", + pr_author="alice", + ) + + +class TestPrepareReviewContextIgnorePatterns: + @pytest.mark.asyncio + async def test_files_matching_patterns_are_excluded(self, tmp_path): + config = _make_config(allowed_users=["alice"]) + config.reviewer = ReviewerConfig( + bot_username="claude-reviewer", + ignore_patterns=frozenset({"*.lock", "vendor/**"}), + ) + platform = _make_platform() + platform.fetch_pr_diff = AsyncMock( + return_value=[ + ChangedFile( + file_path="Cargo.lock", + status=FileStatus.MODIFIED, + patch="@@ -1 +1 @@\n-x\n+y\n", + ), + ChangedFile( + file_path="vendor/foo.go", + status=FileStatus.MODIFIED, + patch="@@ -1 +1 @@\n-x\n+y\n", + ), + ChangedFile( + file_path="src/main.py", + status=FileStatus.MODIFIED, + patch="@@ -1 +1 @@\n-x\n+y\n", + ), + ], + ) + event = _make_lifecycle_event() + + review_context = await _prepare_review_context( + event=event, + config=config, + platform=platform, + workspace_path=str(tmp_path), + bot_username="claude-reviewer", + ) + + kept_paths = [changed.file_path for changed in review_context.changed_files] + assert kept_paths == ["src/main.py"] + + @pytest.mark.asyncio + async def test_empty_patterns_keeps_all_files(self, tmp_path): + config = _make_config(allowed_users=["alice"]) + config.reviewer = ReviewerConfig( + bot_username="claude-reviewer", + ignore_patterns=frozenset(), + ) + platform = _make_platform() + platform.fetch_pr_diff = AsyncMock( + return_value=[ + ChangedFile( + file_path="Cargo.lock", + status=FileStatus.MODIFIED, + patch="@@ -1 +1 @@\n-x\n+y\n", + ), + ChangedFile( + file_path="src/main.py", + status=FileStatus.MODIFIED, + patch="@@ -1 +1 @@\n-x\n+y\n", + ), + ], + ) + event = _make_lifecycle_event() + + review_context = await _prepare_review_context( + event=event, + config=config, + platform=platform, + workspace_path=str(tmp_path), + bot_username="claude-reviewer", + ) + + kept_paths = [changed.file_path for changed in review_context.changed_files] + assert kept_paths == ["Cargo.lock", "src/main.py"] + + @pytest.mark.asyncio + async def test_excluded_files_logged_at_warning(self, tmp_path, caplog): + import logging + + config = _make_config(allowed_users=["alice"]) + config.reviewer = ReviewerConfig( + bot_username="claude-reviewer", + ignore_patterns=frozenset({"*.lock"}), + ) + platform = _make_platform() + platform.fetch_pr_diff = AsyncMock( + return_value=[ + ChangedFile( + file_path="Cargo.lock", + status=FileStatus.MODIFIED, + patch="@@ -1 +1 @@\n-x\n+y\n", + ), + ChangedFile( + file_path="src/main.py", + status=FileStatus.MODIFIED, + patch="@@ -1 +1 @@\n-x\n+y\n", + ), + ], + ) + event = _make_lifecycle_event() + + with caplog.at_level(logging.WARNING, logger="nominal_code.review.reviewer"): + await _prepare_review_context( + event=event, + config=config, + platform=platform, + workspace_path=str(tmp_path), + bot_username="claude-reviewer", + ) + + assert any( + "Excluded 1 of 2 files by ignore_patterns" in record.message + and "owner/repo#42" in record.message + for record in caplog.records + )