diff --git a/dev/breeze/src/airflow_breeze/commands/pr_commands.py b/dev/breeze/src/airflow_breeze/commands/pr_commands.py index d25e90257fc54..6ada78cff39e7 100644 --- a/dev/breeze/src/airflow_breeze/commands/pr_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/pr_commands.py @@ -4036,8 +4036,8 @@ def _run_tui_triage( ("sort", "Sort entries"), ("fill_pages", "Fill initial pages"), ("init_tui", "Initialize TUI"), - ("fetch_diff", "Fetch first diff"), - ("prefetch", "Prefetch diffs"), + ("fetch_diffs", "Fetch diffs"), + ("build_overlaps", "Build overlaps"), ] _tui_tasks: dict[str, TaskID] = {} for key, desc in _tui_steps: @@ -4086,6 +4086,10 @@ def _tui_step_done(key: str, status: str = "done") -> None: # LLM candidates — show as passing with "waiting for LLM" status cat = PRCategory.PASSING entry = PRListEntry(pr, cat) + # Populate cross-references from PR body + from airflow_breeze.utils.pr_context import extract_cross_references + + entry.cross_refs = extract_cross_references(pr.body, exclude_number=pr.number) or None # Restore cached action for already-triaged PRs if cat == PRCategory.ALREADY_TRIAGED and viewer_login and pr.head_sha: _cached_cls, cached_act = _get_cached_classification( @@ -4509,8 +4513,13 @@ def _submit_diff_fetch(pr_number: int, pr_url: str) -> None: _fetch_pr_diff, ctx.token, ctx.github_repository, pr_number ) + # File paths index: PR number -> file paths (built from diffs as they arrive) + _pr_file_paths: dict[int, list[str]] = {} + def _collect_diff_results(tui_ref: TriageTUI) -> None: """Move completed diff futures into the cache and update the TUI if relevant.""" + from airflow_breeze.utils.pr_context import extract_file_paths_from_diff, find_overlapping_prs + done = [num for num, fut in diff_pending.items() if fut.done()] for num in done: fut = diff_pending.pop(num) @@ -4520,6 +4529,23 @@ def _collect_diff_results(tui_ref: TriageTUI) -> None: result = None if result: diff_cache[num] = result + # Extract file paths and store for overlap detection + paths = extract_file_paths_from_diff(result) + if paths: + _pr_file_paths[num] = paths + entry_map = {e.pr.number: e for e in tui_ref.entries} + if num in entry_map: + entry_map[num].file_paths = paths + # Recalculate overlaps for this PR and all PRs that share files with it + affected = {num} | { + pr_num + for pr_num, pr_paths in _pr_file_paths.items() + if pr_num != num and set(paths) & set(pr_paths) + } + for pr_num in affected: + if pr_num in entry_map and pr_num in _pr_file_paths: + overlaps = find_overlapping_prs(_pr_file_paths[pr_num], pr_num, _pr_file_paths) + entry_map[pr_num].overlapping_prs = overlaps or None else: # Look up the PR URL for the error message pr_entry = pr_map.get(num) @@ -4539,27 +4565,59 @@ def _ensure_diff_for_pr(tui_ref: TriageTUI, pr_number: int, pr_url: str) -> None _tui_step_done("init_tui", "ready") - # Prefetch diff for first PR (blocking, since user sees it immediately) + # Fetch all diffs (blocking) so overlaps are ready before the TUI opens if entries: - first_pr = entries[0].pr - _tui_step_start("fetch_diff", f"PR #{first_pr.number}") - diff_text = _fetch_pr_diff(ctx.token, ctx.github_repository, first_pr.number) - if diff_text: - diff_cache[first_pr.number] = diff_text - tui.set_diff(first_pr.number, diff_text) - else: - fallback = f"Could not fetch diff. Review at: {first_pr.url}/files" - diff_cache[first_pr.number] = fallback - tui.set_diff(first_pr.number, fallback) - _tui_step_done("fetch_diff", "loaded") - # Prefetch diffs for next few PRs in background - _tui_step_start("prefetch") - for prefetch_entry in entries[1:4]: - _submit_diff_fetch(prefetch_entry.pr.number, prefetch_entry.pr.url) - _tui_step_done("prefetch", f"{min(3, len(entries) - 1)} queued") + from airflow_breeze.utils.pr_context import extract_file_paths_from_diff, find_overlapping_prs + + _tui_step_start("fetch_diffs", f"0/{len(entries)}") + for e in entries: + _submit_diff_fetch(e.pr.number, e.pr.url) + fetched = 0 + while diff_pending: + done = [num for num, fut in diff_pending.items() if fut.done()] + for num in done: + fut = diff_pending.pop(num) + try: + result = fut.result() + except Exception: + result = None + if result: + diff_cache[num] = result + paths = extract_file_paths_from_diff(result) + if paths: + _pr_file_paths[num] = paths + entry_map = {e.pr.number: e for e in entries} + if num in entry_map: + entry_map[num].file_paths = paths + else: + pr_entry = pr_map.get(num) + fallback_url = pr_entry.url if pr_entry else "" + diff_cache[num] = f"Could not fetch diff. Review at: {fallback_url}/files" + fetched += 1 + _tui_step_start("fetch_diffs", f"{fetched}/{len(entries)}") + if diff_pending: + import time as _time_mod + + _time_mod.sleep(0.1) + # Set diff for first PR in TUI + if entries[0].pr.number in diff_cache: + tui.set_diff(entries[0].pr.number, diff_cache[entries[0].pr.number]) + _tui_step_done("fetch_diffs", f"{fetched} loaded") + + # Build overlaps now that all file paths are known + _tui_step_start("build_overlaps") + entry_map = {e.pr.number: e for e in entries} + overlap_count = 0 + for pr_num, paths in _pr_file_paths.items(): + if pr_num in entry_map: + overlaps = find_overlapping_prs(paths, pr_num, _pr_file_paths) + entry_map[pr_num].overlapping_prs = overlaps or None + if overlaps: + overlap_count += 1 + _tui_step_done("build_overlaps", f"{overlap_count} PRs with overlaps") else: - _tui_step_done("fetch_diff", "no PRs") - _tui_step_done("prefetch", "skipped") + _tui_step_done("fetch_diffs", "no PRs") + _tui_step_done("build_overlaps", "skipped") _tui_progress.stop() @@ -4607,8 +4665,8 @@ def _ensure_diff_for_pr(tui_ref: TriageTUI, pr_number: int, pr_url: str) -> None for entry in entries: if entry.llm_status in ("in_progress", "pending"): n = entry.pr.number - fut = _llm_pr_to_future.get(n) - if fut is not None and not fut.done(): + llm_fut = _llm_pr_to_future.get(n) + if llm_fut is not None and not llm_fut.done(): _undone_entries.append(entry) elif n in _llm_completed_pr_nums and n not in _llm_result_pr_nums: entry.llm_status = "error" diff --git a/dev/breeze/src/airflow_breeze/utils/pr_context.py b/dev/breeze/src/airflow_breeze/utils/pr_context.py new file mode 100644 index 0000000000000..e056cd30f5bf7 --- /dev/null +++ b/dev/breeze/src/airflow_breeze/utils/pr_context.py @@ -0,0 +1,85 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Cross-references and overlapping PR detection for auto-triage.""" + +from __future__ import annotations + +import re + + +def extract_file_paths_from_diff(diff_text: str) -> list[str]: + """Extract file paths from a unified diff. + + Parses ``diff --git a/path b/path`` headers and returns + deduplicated file paths in order of appearance. + """ + paths: list[str] = [] + seen: set[str] = set() + for match in re.finditer(r"^diff --git a/(.+?) b/", diff_text, re.MULTILINE): + path = match.group(1) + if path not in seen: + paths.append(path) + seen.add(path) + return paths + + +def extract_cross_references(body: str, exclude_number: int = 0) -> list[int]: + """Extract issue/PR numbers referenced as #N in the PR body. + + Returns deduplicated numbers in order of appearance, excluding + the PR's own number. + """ + refs: list[int] = [] + seen: set[int] = set() + for match in re.finditer(r"(? dict[int, list[str]]: + """Find open PRs that touch the same files as the target PR. + + Args: + target_files: File paths changed by the target PR. + target_number: PR number of the target (excluded from results). + other_prs: Mapping of PR number -> list of file paths for other open PRs. + + Returns: + Dict of PR number -> list of overlapping file paths, sorted by + overlap count descending. + """ + if not target_files: + return {} + + target_set = set(target_files) + overlaps: dict[int, list[str]] = {} + + for pr_num, pr_files in other_prs.items(): + if pr_num == target_number: + continue + common = target_set & set(pr_files) + if common: + overlaps[pr_num] = sorted(common) + + return dict(sorted(overlaps.items(), key=lambda x: -len(x[1]))) diff --git a/dev/breeze/src/airflow_breeze/utils/tui_display.py b/dev/breeze/src/airflow_breeze/utils/tui_display.py index c4027b786aba7..e891a4684498b 100644 --- a/dev/breeze/src/airflow_breeze/utils/tui_display.py +++ b/dev/breeze/src/airflow_breeze/utils/tui_display.py @@ -115,6 +115,7 @@ class TUIAction(Enum): ACTION_FLAG = "flag" ACTION_LLM = "llm" ACTION_AUTHOR_INFO = "author_info" + SEARCH = "search" class _FocusPanel(Enum): @@ -328,6 +329,8 @@ def _read_tui_key(*, timeout: float | None = None) -> TUIAction | MouseEvent | s return TUIAction.ACTION_LLM if ch == "i": return TUIAction.ACTION_AUTHOR_INFO + if ch == "/": + return TUIAction.SEARCH # Ctrl-C if ch == "\x03": return TUIAction.QUIT @@ -355,6 +358,12 @@ def __init__(self, pr: PRData, category: PRCategory, *, action_taken: str = "", self.llm_attempts: int = 0 # number of LLM attempts (including retries) # Author scoring (populated when author profile is fetched) self.author_scoring: dict | None = None + # File paths changed by this PR (populated from diff) + self.file_paths: list[str] | None = None + # Cross-references found in PR body + self.cross_refs: list[int] | None = None + # Overlapping PRs (PR number -> list of shared files) + self.overlapping_prs: dict[int, list[str]] | None = None class TriageTUI: @@ -916,6 +925,28 @@ def _build_detail_lines(self, entry: PRListEntry) -> list[str]: label_text += f" (+{len(pr.labels) - 8} more)" lines.append(f"Labels: {label_text}") + # Cross-references + if entry.cross_refs: + lines.append("") + ref_links = [ + f"[link=https://github.com/{self.github_repository}/issues/{n}]#{n}[/link]" + for n in entry.cross_refs[:10] + ] + lines.append(f"References: {', '.join(ref_links)}") + + # Overlapping PRs (other open PRs touching the same files) + if entry.overlapping_prs: + lines.append("") + lines.append(f"[yellow]Overlapping PRs ({len(entry.overlapping_prs)}):[/]") + for opr_num, shared_files in list(entry.overlapping_prs.items())[:5]: + opr_link = ( + f"[link=https://github.com/{self.github_repository}/pull/{opr_num}]#{opr_num}[/link]" + ) + files_text = ", ".join(f.rsplit("/", 1)[-1] for f in shared_files[:3]) + if len(shared_files) > 3: + files_text += f" +{len(shared_files) - 3} more" + lines.append(f" {opr_link}: {files_text}") + # Assessment / flagging details (summary and violations) assessment = self._assessments.get(pr.number) if assessment: @@ -1260,14 +1291,14 @@ def _build_footer(self, total_width: int) -> Columns: if self._focus in (_FocusPanel.DIFF, _FocusPanel.DETAIL): nav_lines = [ "[bold]j/↓[/] Down [bold]k/↑[/] Up [bold]PgDn[/] Page", - f"[bold]Tab[/] → {next_panel} [bold]🖱[/] Scroll", + f"[bold]Tab[/] → {next_panel} [bold]/[/] Search [bold]🖱[/] Scroll", "[bold]Esc/q[/] Quit", ] else: nav_lines = [ "[bold]j/↓[/] Down [bold]k/↑[/] Up [bold]q[/] Quit", f"[bold]n[/] Next pg [bold]p[/] Prev pg [bold]Tab[/] → {next_panel}", - "[bold]🖱[/] Click row / Scroll panels", + "[bold]/[/] Search [bold]🖱[/] Click row / Scroll panels", ] nav_text = "\n".join(nav_lines) nav_panel = Panel(nav_text, title="Nav", border_style="dim", padding=(0, 1)) @@ -1698,6 +1729,63 @@ def render_author_overlay(self, profile: dict) -> None: sys.stdout.write("\033[?25h") # restore cursor sys.stdout.flush() + def search_jump(self) -> bool: + """Show a search prompt at the bottom of the screen. + + The user types a PR number or text. Pressing Enter jumps to the first + matching entry. Pressing Escape cancels. Returns True if the cursor moved. + """ + width, height = _get_terminal_size() + prompt = "/ Jump to PR #: " + query = "" + + while True: + # Draw prompt on the last row + display = f"{prompt}{query}_" + sys.stdout.write(f"\033[{height};1H\033[2K{display}") + sys.stdout.flush() + + ch = _read_raw_input(timeout=None) + if ch is None or ch == "\x1b": + # Escape or timeout — cancel + break + if ch in ("\r", "\n"): + # Enter — search + break + if ch in ("\x7f", "\x08"): + # Backspace + query = query[:-1] + continue + if ch == "\x03": + # Ctrl-C — cancel + break + if len(ch) == 1 and ch.isprintable(): + query += ch + + # Clear the prompt line + sys.stdout.write(f"\033[{height};1H\033[2K") + sys.stdout.flush() + + if not query: + return False + + # Match by PR number only + try: + target_num = int(query.lstrip("#")) + except ValueError: + return False + + for idx, entry in enumerate(self.entries): + if entry.pr.number == target_num: + self.cursor = idx + # Put the matched entry at the top of the visible list + self.scroll_offset = idx + # Switch focus to PR list so the selection is highlighted + self._focus = _FocusPanel.PR_LIST + return True + + return False + def run_interactive( self, *, timeout: float | None = None ) -> tuple[PRListEntry | None, TUIAction | str | None]: @@ -1774,6 +1862,10 @@ def run_interactive( if entry and not entry.action_taken and key in self.get_available_actions(entry): return entry, key return None, key + if key == TUIAction.SEARCH: + if self.search_jump(): + return None, TUIAction.UP + return None, key # Ignore other keys in diff focus return None, key @@ -1823,6 +1915,8 @@ def run_interactive( if entry and not entry.action_taken and key in self.get_available_actions(entry): return entry, key return None, key + if key == TUIAction.SEARCH: + return None, key # Ignore other keys in detail focus return None, key @@ -1882,5 +1976,9 @@ def run_interactive( if entry and not entry.action_taken and key in self.get_available_actions(entry): return entry, key return None, key + if key == TUIAction.SEARCH: + if self.search_jump(): + return None, TUIAction.UP # signal cursor moved + return None, key # Unknown key — return it for caller to handle return self.get_selected_entry(), key diff --git a/dev/breeze/tests/test_pr_context.py b/dev/breeze/tests/test_pr_context.py new file mode 100644 index 0000000000000..cd1a282adeb31 --- /dev/null +++ b/dev/breeze/tests/test_pr_context.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from airflow_breeze.utils.pr_context import ( + extract_cross_references, + extract_file_paths_from_diff, + find_overlapping_prs, +) + + +class TestExtractFilePathsFromDiff: + def test_basic_diff(self): + diff = ( + "diff --git a/src/foo.py b/src/foo.py\n" + "index abc..def 100644\n" + "--- a/src/foo.py\n" + "+++ b/src/foo.py\n" + "@@ -1,3 +1,4 @@\n" + " line1\n" + "+line2\n" + "diff --git a/src/bar.py b/src/bar.py\n" + "index ghi..jkl 100644\n" + ) + assert extract_file_paths_from_diff(diff) == ["src/foo.py", "src/bar.py"] + + def test_deduplicates(self): + diff = "diff --git a/x.py b/x.py\ndiff --git a/x.py b/x.py\n" + assert extract_file_paths_from_diff(diff) == ["x.py"] + + def test_empty_diff(self): + assert extract_file_paths_from_diff("") == [] + + def test_rename(self): + diff = "diff --git a/old/path.py b/new/path.py\n" + assert extract_file_paths_from_diff(diff) == ["old/path.py"] + + +class TestExtractCrossReferences: + def test_basic_refs(self): + body = "This fixes #123 and relates to #456." + assert extract_cross_references(body) == [123, 456] + + def test_excludes_self(self): + body = "See #100 and #200" + assert extract_cross_references(body, exclude_number=100) == [200] + + def test_deduplicates(self): + body = "Fixes #123. Also see #123 again." + assert extract_cross_references(body) == [123] + + def test_no_refs(self): + assert extract_cross_references("Just a plain description.") == [] + + def test_ignores_anchors(self): + body = "See [link](#section) and #42" + # #section is not a number so only #42 matches + assert extract_cross_references(body) == [42] + + def test_ignores_mid_word(self): + body = "color#123 should not match but #456 should" + assert extract_cross_references(body) == [456] + + +class TestFindOverlappingPrs: + def test_basic_overlap(self): + target_files = ["src/foo.py", "src/bar.py"] + other_prs = { + 200: ["src/foo.py", "src/baz.py"], + 300: ["src/qux.py"], + 400: ["src/bar.py", "src/foo.py"], + } + result = find_overlapping_prs(target_files, 100, other_prs) + assert 200 in result + assert 400 in result + assert 300 not in result + # PR 400 has 2 overlapping files, should come first + assert next(iter(result.keys())) == 400 + + def test_excludes_self(self): + result = find_overlapping_prs(["a.py"], 100, {100: ["a.py"]}) + assert result == {} + + def test_empty_files(self): + assert find_overlapping_prs([], 100, {200: ["a.py"]}) == {} + + def test_no_overlap(self): + result = find_overlapping_prs(["a.py"], 100, {200: ["b.py"]}) + assert result == {}