This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 979547f91d9 Add cross-references, overlapping PR detection, and search 
to TUI (#64579)
979547f91d9 is described below

commit 979547f91d90472ef9efb2b177966384eb670525
Author: André Ahlert <[email protected]>
AuthorDate: Wed Apr 1 15:55:17 2026 -0300

    Add cross-references, overlapping PR detection, and search to TUI (#64579)
    
    * Add cross-references, overlapping PR detection, and search to TUI
    
    - Extract #N references from PR bodies and display them in the
      detail panel as clickable links.
    - Parse file paths from diffs to detect open PRs that touch the
      same files. Overlapping PRs are shown in the detail panel with
      shared file names.
    - Fetch all diffs during the loading phase so overlaps are ready
      before the TUI opens.
    - Add "/" key to jump to a PR by number, title, or author name.
    
    Signed-off-by: André Ahlert <[email protected]>
    
    * Fix mypy type narrowing error for Future variable reuse
    
    Signed-off-by: André Ahlert <[email protected]>
    
    ---------
    
    Signed-off-by: André Ahlert <[email protected]>
---
 .../src/airflow_breeze/commands/pr_commands.py     | 104 ++++++++++++++++-----
 dev/breeze/src/airflow_breeze/utils/pr_context.py  |  85 +++++++++++++++++
 dev/breeze/src/airflow_breeze/utils/tui_display.py | 102 +++++++++++++++++++-
 dev/breeze/tests/test_pr_context.py                | 103 ++++++++++++++++++++
 4 files changed, 369 insertions(+), 25 deletions(-)

diff --git a/dev/breeze/src/airflow_breeze/commands/pr_commands.py 
b/dev/breeze/src/airflow_breeze/commands/pr_commands.py
index 01948228906..104f4e9b062 100644
--- a/dev/breeze/src/airflow_breeze/commands/pr_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/pr_commands.py
@@ -4036,8 +4036,8 @@ def _run_tui_triage(
         ("sort", "Sort entries"),
         ("fill_pages", "Fill initial pages"),
         ("init_tui", "Initialize TUI"),
-        ("fetch_diff", "Fetch first diff"),
-        ("prefetch", "Prefetch diffs"),
+        ("fetch_diffs", "Fetch diffs"),
+        ("build_overlaps", "Build overlaps"),
     ]
     _tui_tasks: dict[str, TaskID] = {}
     for key, desc in _tui_steps:
@@ -4086,6 +4086,10 @@ def _run_tui_triage(
             # LLM candidates — show as passing with "waiting for LLM" status
             cat = PRCategory.PASSING
         entry = PRListEntry(pr, cat)
+        # Populate cross-references from PR body
+        from airflow_breeze.utils.pr_context import extract_cross_references
+
+        entry.cross_refs = extract_cross_references(pr.body, 
exclude_number=pr.number) or None
         # Restore cached action for already-triaged PRs
         if cat == PRCategory.ALREADY_TRIAGED and viewer_login and pr.head_sha:
             _cached_cls, cached_act = _get_cached_classification(
@@ -4509,8 +4513,13 @@ def _run_tui_triage(
             _fetch_pr_diff, ctx.token, ctx.github_repository, pr_number
         )
 
+    # File paths index: PR number -> file paths (built from diffs as they 
arrive)
+    _pr_file_paths: dict[int, list[str]] = {}
+
     def _collect_diff_results(tui_ref: TriageTUI) -> None:
         """Move completed diff futures into the cache and update the TUI if 
relevant."""
+        from airflow_breeze.utils.pr_context import 
extract_file_paths_from_diff, find_overlapping_prs
+
         done = [num for num, fut in diff_pending.items() if fut.done()]
         for num in done:
             fut = diff_pending.pop(num)
@@ -4520,6 +4529,23 @@ def _run_tui_triage(
                 result = None
             if result:
                 diff_cache[num] = result
+                # Extract file paths and store for overlap detection
+                paths = extract_file_paths_from_diff(result)
+                if paths:
+                    _pr_file_paths[num] = paths
+                    entry_map = {e.pr.number: e for e in tui_ref.entries}
+                    if num in entry_map:
+                        entry_map[num].file_paths = paths
+                    # Recalculate overlaps for this PR and all PRs that share 
files with it
+                    affected = {num} | {
+                        pr_num
+                        for pr_num, pr_paths in _pr_file_paths.items()
+                        if pr_num != num and set(paths) & set(pr_paths)
+                    }
+                    for pr_num in affected:
+                        if pr_num in entry_map and pr_num in _pr_file_paths:
+                            overlaps = 
find_overlapping_prs(_pr_file_paths[pr_num], pr_num, _pr_file_paths)
+                            entry_map[pr_num].overlapping_prs = overlaps or 
None
             else:
                 # Look up the PR URL for the error message
                 pr_entry = pr_map.get(num)
@@ -4539,27 +4565,59 @@ def _run_tui_triage(
 
     _tui_step_done("init_tui", "ready")
 
-    # Prefetch diff for first PR (blocking, since user sees it immediately)
+    # Fetch all diffs (blocking) so overlaps are ready before the TUI opens
     if entries:
-        first_pr = entries[0].pr
-        _tui_step_start("fetch_diff", f"PR #{first_pr.number}")
-        diff_text = _fetch_pr_diff(ctx.token, ctx.github_repository, 
first_pr.number)
-        if diff_text:
-            diff_cache[first_pr.number] = diff_text
-            tui.set_diff(first_pr.number, diff_text)
-        else:
-            fallback = f"Could not fetch diff. Review at: {first_pr.url}/files"
-            diff_cache[first_pr.number] = fallback
-            tui.set_diff(first_pr.number, fallback)
-        _tui_step_done("fetch_diff", "loaded")
-        # Prefetch diffs for next few PRs in background
-        _tui_step_start("prefetch")
-        for prefetch_entry in entries[1:4]:
-            _submit_diff_fetch(prefetch_entry.pr.number, prefetch_entry.pr.url)
-        _tui_step_done("prefetch", f"{min(3, len(entries) - 1)} queued")
+        from airflow_breeze.utils.pr_context import 
extract_file_paths_from_diff, find_overlapping_prs
+
+        _tui_step_start("fetch_diffs", f"0/{len(entries)}")
+        for e in entries:
+            _submit_diff_fetch(e.pr.number, e.pr.url)
+        fetched = 0
+        while diff_pending:
+            done = [num for num, fut in diff_pending.items() if fut.done()]
+            for num in done:
+                fut = diff_pending.pop(num)
+                try:
+                    result = fut.result()
+                except Exception:
+                    result = None
+                if result:
+                    diff_cache[num] = result
+                    paths = extract_file_paths_from_diff(result)
+                    if paths:
+                        _pr_file_paths[num] = paths
+                        entry_map = {e.pr.number: e for e in entries}
+                        if num in entry_map:
+                            entry_map[num].file_paths = paths
+                else:
+                    pr_entry = pr_map.get(num)
+                    fallback_url = pr_entry.url if pr_entry else ""
+                    diff_cache[num] = f"Could not fetch diff. Review at: 
{fallback_url}/files"
+                fetched += 1
+                _tui_step_start("fetch_diffs", f"{fetched}/{len(entries)}")
+            if diff_pending:
+                import time as _time_mod
+
+                _time_mod.sleep(0.1)
+        # Set diff for first PR in TUI
+        if entries[0].pr.number in diff_cache:
+            tui.set_diff(entries[0].pr.number, 
diff_cache[entries[0].pr.number])
+        _tui_step_done("fetch_diffs", f"{fetched} loaded")
+
+        # Build overlaps now that all file paths are known
+        _tui_step_start("build_overlaps")
+        entry_map = {e.pr.number: e for e in entries}
+        overlap_count = 0
+        for pr_num, paths in _pr_file_paths.items():
+            if pr_num in entry_map:
+                overlaps = find_overlapping_prs(paths, pr_num, _pr_file_paths)
+                entry_map[pr_num].overlapping_prs = overlaps or None
+                if overlaps:
+                    overlap_count += 1
+        _tui_step_done("build_overlaps", f"{overlap_count} PRs with overlaps")
     else:
-        _tui_step_done("fetch_diff", "no PRs")
-        _tui_step_done("prefetch", "skipped")
+        _tui_step_done("fetch_diffs", "no PRs")
+        _tui_step_done("build_overlaps", "skipped")
 
     _tui_progress.stop()
 
@@ -4607,8 +4665,8 @@ def _run_tui_triage(
         for entry in entries:
             if entry.llm_status in ("in_progress", "pending"):
                 n = entry.pr.number
-                fut = _llm_pr_to_future.get(n)
-                if fut is not None and not fut.done():
+                llm_fut = _llm_pr_to_future.get(n)
+                if llm_fut is not None and not llm_fut.done():
                     _undone_entries.append(entry)
                 elif n in _llm_completed_pr_nums and n not in 
_llm_result_pr_nums:
                     entry.llm_status = "error"
diff --git a/dev/breeze/src/airflow_breeze/utils/pr_context.py 
b/dev/breeze/src/airflow_breeze/utils/pr_context.py
new file mode 100644
index 00000000000..e056cd30f5b
--- /dev/null
+++ b/dev/breeze/src/airflow_breeze/utils/pr_context.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Cross-references and overlapping PR detection for auto-triage."""
+
+from __future__ import annotations
+
+import re
+
+
+def extract_file_paths_from_diff(diff_text: str) -> list[str]:
+    """Extract file paths from a unified diff.
+
+    Parses ``diff --git a/path b/path`` headers and returns
+    deduplicated file paths in order of appearance.
+    """
+    paths: list[str] = []
+    seen: set[str] = set()
+    for match in re.finditer(r"^diff --git a/(.+?) b/", diff_text, 
re.MULTILINE):
+        path = match.group(1)
+        if path not in seen:
+            paths.append(path)
+            seen.add(path)
+    return paths
+
+
+def extract_cross_references(body: str, exclude_number: int = 0) -> list[int]:
+    """Extract issue/PR numbers referenced as #N in the PR body.
+
+    Returns deduplicated numbers in order of appearance, excluding
+    the PR's own number.
+    """
+    refs: list[int] = []
+    seen: set[int] = set()
+    for match in re.finditer(r"(?<!\w)#(\d+)(?!\d)", body):
+        num = int(match.group(1))
+        if num != exclude_number and num not in seen:
+            refs.append(num)
+            seen.add(num)
+    return refs
+
+
+def find_overlapping_prs(
+    target_files: list[str],
+    target_number: int,
+    other_prs: dict[int, list[str]],
+) -> dict[int, list[str]]:
+    """Find open PRs that touch the same files as the target PR.
+
+    Args:
+        target_files: File paths changed by the target PR.
+        target_number: PR number of the target (excluded from results).
+        other_prs: Mapping of PR number -> list of file paths for other open 
PRs.
+
+    Returns:
+        Dict of PR number -> list of overlapping file paths, sorted by
+        overlap count descending.
+    """
+    if not target_files:
+        return {}
+
+    target_set = set(target_files)
+    overlaps: dict[int, list[str]] = {}
+
+    for pr_num, pr_files in other_prs.items():
+        if pr_num == target_number:
+            continue
+        common = target_set & set(pr_files)
+        if common:
+            overlaps[pr_num] = sorted(common)
+
+    return dict(sorted(overlaps.items(), key=lambda x: -len(x[1])))
diff --git a/dev/breeze/src/airflow_breeze/utils/tui_display.py 
b/dev/breeze/src/airflow_breeze/utils/tui_display.py
index 4eaaadaa5fd..93cb229b05a 100644
--- a/dev/breeze/src/airflow_breeze/utils/tui_display.py
+++ b/dev/breeze/src/airflow_breeze/utils/tui_display.py
@@ -115,6 +115,7 @@ class TUIAction(Enum):
     ACTION_FLAG = "flag"
     ACTION_LLM = "llm"
     ACTION_AUTHOR_INFO = "author_info"
+    SEARCH = "search"
 
 
 class _FocusPanel(Enum):
@@ -328,6 +329,8 @@ def _read_tui_key(*, timeout: float | None = None) -> 
TUIAction | MouseEvent | s
         return TUIAction.ACTION_LLM
     if ch == "i":
         return TUIAction.ACTION_AUTHOR_INFO
+    if ch == "/":
+        return TUIAction.SEARCH
     # Ctrl-C
     if ch == "\x03":
         return TUIAction.QUIT
@@ -355,6 +358,12 @@ class PRListEntry:
         self.llm_attempts: int = 0  # number of LLM attempts (including 
retries)
         # Author scoring (populated when author profile is fetched)
         self.author_scoring: dict | None = None
+        # File paths changed by this PR (populated from diff)
+        self.file_paths: list[str] | None = None
+        # Cross-references found in PR body
+        self.cross_refs: list[int] | None = None
+        # Overlapping PRs (PR number -> list of shared files)
+        self.overlapping_prs: dict[int, list[str]] | None = None
 
 
 class TriageTUI:
@@ -914,6 +923,28 @@ class TriageTUI:
                 label_text += f" (+{len(pr.labels) - 8} more)"
             lines.append(f"Labels: {label_text}")
 
+        # Cross-references
+        if entry.cross_refs:
+            lines.append("")
+            ref_links = [
+                
f"[link=https://github.com/{self.github_repository}/issues/{n}]#{n}[/link]";
+                for n in entry.cross_refs[:10]
+            ]
+            lines.append(f"References: {', '.join(ref_links)}")
+
+        # Overlapping PRs (other open PRs touching the same files)
+        if entry.overlapping_prs:
+            lines.append("")
+            lines.append(f"[yellow]Overlapping PRs 
({len(entry.overlapping_prs)}):[/]")
+            for opr_num, shared_files in 
list(entry.overlapping_prs.items())[:5]:
+                opr_link = (
+                    
f"[link=https://github.com/{self.github_repository}/pull/{opr_num}]#{opr_num}[/link]";
+                )
+                files_text = ", ".join(f.rsplit("/", 1)[-1] for f in 
shared_files[:3])
+                if len(shared_files) > 3:
+                    files_text += f" +{len(shared_files) - 3} more"
+                lines.append(f"  {opr_link}: {files_text}")
+
         # Assessment / flagging details (summary and violations)
         assessment = self._assessments.get(pr.number)
         if assessment:
@@ -1258,14 +1289,14 @@ class TriageTUI:
         if self._focus in (_FocusPanel.DIFF, _FocusPanel.DETAIL):
             nav_lines = [
                 "[bold]j/↓[/] Down  [bold]k/↑[/] Up  [bold]PgDn[/] Page",
-                f"[bold]Tab[/] → {next_panel}  [bold]🖱[/] Scroll",
+                f"[bold]Tab[/] → {next_panel}  [bold]/[/] Search  [bold]🖱[/] 
Scroll",
                 "[bold]Esc/q[/] Quit",
             ]
         else:
             nav_lines = [
                 "[bold]j/↓[/] Down  [bold]k/↑[/] Up  [bold]q[/] Quit",
                 f"[bold]n[/] Next pg  [bold]p[/] Prev pg  [bold]Tab[/] → 
{next_panel}",
-                "[bold]🖱[/] Click row / Scroll panels",
+                "[bold]/[/] Search  [bold]🖱[/] Click row / Scroll panels",
             ]
         nav_text = "\n".join(nav_lines)
         nav_panel = Panel(nav_text, title="Nav", border_style="dim", 
padding=(0, 1))
@@ -1696,6 +1727,63 @@ class TriageTUI:
         sys.stdout.write("\033[?25h")  # restore cursor
         sys.stdout.flush()
 
+    def search_jump(self) -> bool:
+        """Show a search prompt at the bottom of the screen.
+
+        The user types a PR number or text. Pressing Enter jumps to the first
+        matching entry. Pressing Escape cancels. Returns True if the cursor 
moved.
+        """
+        width, height = _get_terminal_size()
+        prompt = "/ Jump to PR #: "
+        query = ""
+
+        while True:
+            # Draw prompt on the last row
+            display = f"{prompt}{query}_"
+            sys.stdout.write(f"\033[{height};1H\033[2K{display}")
+            sys.stdout.flush()
+
+            ch = _read_raw_input(timeout=None)
+            if ch is None or ch == "\x1b":
+                # Escape or timeout — cancel
+                break
+            if ch in ("\r", "\n"):
+                # Enter — search
+                break
+            if ch in ("\x7f", "\x08"):
+                # Backspace
+                query = query[:-1]
+                continue
+            if ch == "\x03":
+                # Ctrl-C — cancel
+                break
+            if len(ch) == 1 and ch.isprintable():
+                query += ch
+
+        # Clear the prompt line
+        sys.stdout.write(f"\033[{height};1H\033[2K")
+        sys.stdout.flush()
+
+        if not query:
+            return False
+
+        # Match by PR number only
+        try:
+            target_num = int(query.lstrip("#"))
+        except ValueError:
+            return False
+
+        for idx, entry in enumerate(self.entries):
+            if entry.pr.number == target_num:
+                self.cursor = idx
+                # Put the matched entry at the top of the visible list
+                self.scroll_offset = idx
+                # Switch focus to PR list so the selection is highlighted
+                self._focus = _FocusPanel.PR_LIST
+                return True
+
+        return False
+
     def run_interactive(
         self, *, timeout: float | None = None
     ) -> tuple[PRListEntry | None, TUIAction | str | None]:
@@ -1772,6 +1860,10 @@ class TriageTUI:
                 if entry and not entry.action_taken and key in 
self.get_available_actions(entry):
                     return entry, key
                 return None, key
+            if key == TUIAction.SEARCH:
+                if self.search_jump():
+                    return None, TUIAction.UP
+                return None, key
             # Ignore other keys in diff focus
             return None, key
 
@@ -1821,6 +1913,8 @@ class TriageTUI:
                 if entry and not entry.action_taken and key in 
self.get_available_actions(entry):
                     return entry, key
                 return None, key
+            if key == TUIAction.SEARCH:
+                return None, key
             # Ignore other keys in detail focus
             return None, key
 
@@ -1880,5 +1974,9 @@ class TriageTUI:
             if entry and not entry.action_taken and key in 
self.get_available_actions(entry):
                 return entry, key
             return None, key
+        if key == TUIAction.SEARCH:
+            if self.search_jump():
+                return None, TUIAction.UP  # signal cursor moved
+            return None, key
         # Unknown key — return it for caller to handle
         return self.get_selected_entry(), key
diff --git a/dev/breeze/tests/test_pr_context.py 
b/dev/breeze/tests/test_pr_context.py
new file mode 100644
index 00000000000..cd1a282adeb
--- /dev/null
+++ b/dev/breeze/tests/test_pr_context.py
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from airflow_breeze.utils.pr_context import (
+    extract_cross_references,
+    extract_file_paths_from_diff,
+    find_overlapping_prs,
+)
+
+
+class TestExtractFilePathsFromDiff:
+    def test_basic_diff(self):
+        diff = (
+            "diff --git a/src/foo.py b/src/foo.py\n"
+            "index abc..def 100644\n"
+            "--- a/src/foo.py\n"
+            "+++ b/src/foo.py\n"
+            "@@ -1,3 +1,4 @@\n"
+            " line1\n"
+            "+line2\n"
+            "diff --git a/src/bar.py b/src/bar.py\n"
+            "index ghi..jkl 100644\n"
+        )
+        assert extract_file_paths_from_diff(diff) == ["src/foo.py", 
"src/bar.py"]
+
+    def test_deduplicates(self):
+        diff = "diff --git a/x.py b/x.py\ndiff --git a/x.py b/x.py\n"
+        assert extract_file_paths_from_diff(diff) == ["x.py"]
+
+    def test_empty_diff(self):
+        assert extract_file_paths_from_diff("") == []
+
+    def test_rename(self):
+        diff = "diff --git a/old/path.py b/new/path.py\n"
+        assert extract_file_paths_from_diff(diff) == ["old/path.py"]
+
+
+class TestExtractCrossReferences:
+    def test_basic_refs(self):
+        body = "This fixes #123 and relates to #456."
+        assert extract_cross_references(body) == [123, 456]
+
+    def test_excludes_self(self):
+        body = "See #100 and #200"
+        assert extract_cross_references(body, exclude_number=100) == [200]
+
+    def test_deduplicates(self):
+        body = "Fixes #123. Also see #123 again."
+        assert extract_cross_references(body) == [123]
+
+    def test_no_refs(self):
+        assert extract_cross_references("Just a plain description.") == []
+
+    def test_ignores_anchors(self):
+        body = "See [link](#section) and #42"
+        # #section is not a number so only #42 matches
+        assert extract_cross_references(body) == [42]
+
+    def test_ignores_mid_word(self):
+        body = "color#123 should not match but #456 should"
+        assert extract_cross_references(body) == [456]
+
+
+class TestFindOverlappingPrs:
+    def test_basic_overlap(self):
+        target_files = ["src/foo.py", "src/bar.py"]
+        other_prs = {
+            200: ["src/foo.py", "src/baz.py"],
+            300: ["src/qux.py"],
+            400: ["src/bar.py", "src/foo.py"],
+        }
+        result = find_overlapping_prs(target_files, 100, other_prs)
+        assert 200 in result
+        assert 400 in result
+        assert 300 not in result
+        # PR 400 has 2 overlapping files, should come first
+        assert next(iter(result.keys())) == 400
+
+    def test_excludes_self(self):
+        result = find_overlapping_prs(["a.py"], 100, {100: ["a.py"]})
+        assert result == {}
+
+    def test_empty_files(self):
+        assert find_overlapping_prs([], 100, {200: ["a.py"]}) == {}
+
+    def test_no_overlap(self):
+        result = find_overlapping_prs(["a.py"], 100, {200: ["b.py"]})
+        assert result == {}

Reply via email to