This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 979547f91d9 Add cross-references, overlapping PR detection, and search
to TUI (#64579)
979547f91d9 is described below
commit 979547f91d90472ef9efb2b177966384eb670525
Author: André Ahlert <[email protected]>
AuthorDate: Wed Apr 1 15:55:17 2026 -0300
Add cross-references, overlapping PR detection, and search to TUI (#64579)
* Add cross-references, overlapping PR detection, and search to TUI
- Extract #N references from PR bodies and display them in the
detail panel as clickable links.
- Parse file paths from diffs to detect open PRs that touch the
same files. Overlapping PRs are shown in the detail panel with
shared file names.
- Fetch all diffs during the loading phase so overlaps are ready
before the TUI opens.
- Add "/" key to jump to a PR by number, title, or author name.
Signed-off-by: André Ahlert <[email protected]>
* Fix mypy type narrowing error for Future variable reuse
Signed-off-by: André Ahlert <[email protected]>
---------
Signed-off-by: André Ahlert <[email protected]>
---
.../src/airflow_breeze/commands/pr_commands.py | 104 ++++++++++++++++-----
dev/breeze/src/airflow_breeze/utils/pr_context.py | 85 +++++++++++++++++
dev/breeze/src/airflow_breeze/utils/tui_display.py | 102 +++++++++++++++++++-
dev/breeze/tests/test_pr_context.py | 103 ++++++++++++++++++++
4 files changed, 369 insertions(+), 25 deletions(-)
diff --git a/dev/breeze/src/airflow_breeze/commands/pr_commands.py
b/dev/breeze/src/airflow_breeze/commands/pr_commands.py
index 01948228906..104f4e9b062 100644
--- a/dev/breeze/src/airflow_breeze/commands/pr_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/pr_commands.py
@@ -4036,8 +4036,8 @@ def _run_tui_triage(
("sort", "Sort entries"),
("fill_pages", "Fill initial pages"),
("init_tui", "Initialize TUI"),
- ("fetch_diff", "Fetch first diff"),
- ("prefetch", "Prefetch diffs"),
+ ("fetch_diffs", "Fetch diffs"),
+ ("build_overlaps", "Build overlaps"),
]
_tui_tasks: dict[str, TaskID] = {}
for key, desc in _tui_steps:
@@ -4086,6 +4086,10 @@ def _run_tui_triage(
# LLM candidates — show as passing with "waiting for LLM" status
cat = PRCategory.PASSING
entry = PRListEntry(pr, cat)
+ # Populate cross-references from PR body
+ from airflow_breeze.utils.pr_context import extract_cross_references
+
+ entry.cross_refs = extract_cross_references(pr.body,
exclude_number=pr.number) or None
# Restore cached action for already-triaged PRs
if cat == PRCategory.ALREADY_TRIAGED and viewer_login and pr.head_sha:
_cached_cls, cached_act = _get_cached_classification(
@@ -4509,8 +4513,13 @@ def _run_tui_triage(
_fetch_pr_diff, ctx.token, ctx.github_repository, pr_number
)
+ # File paths index: PR number -> file paths (built from diffs as they
arrive)
+ _pr_file_paths: dict[int, list[str]] = {}
+
def _collect_diff_results(tui_ref: TriageTUI) -> None:
"""Move completed diff futures into the cache and update the TUI if
relevant."""
+ from airflow_breeze.utils.pr_context import
extract_file_paths_from_diff, find_overlapping_prs
+
done = [num for num, fut in diff_pending.items() if fut.done()]
for num in done:
fut = diff_pending.pop(num)
@@ -4520,6 +4529,23 @@ def _run_tui_triage(
result = None
if result:
diff_cache[num] = result
+ # Extract file paths and store for overlap detection
+ paths = extract_file_paths_from_diff(result)
+ if paths:
+ _pr_file_paths[num] = paths
+ entry_map = {e.pr.number: e for e in tui_ref.entries}
+ if num in entry_map:
+ entry_map[num].file_paths = paths
+ # Recalculate overlaps for this PR and all PRs that share
files with it
+ affected = {num} | {
+ pr_num
+ for pr_num, pr_paths in _pr_file_paths.items()
+ if pr_num != num and set(paths) & set(pr_paths)
+ }
+ for pr_num in affected:
+ if pr_num in entry_map and pr_num in _pr_file_paths:
+ overlaps =
find_overlapping_prs(_pr_file_paths[pr_num], pr_num, _pr_file_paths)
+ entry_map[pr_num].overlapping_prs = overlaps or
None
else:
# Look up the PR URL for the error message
pr_entry = pr_map.get(num)
@@ -4539,27 +4565,59 @@ def _run_tui_triage(
_tui_step_done("init_tui", "ready")
- # Prefetch diff for first PR (blocking, since user sees it immediately)
+ # Fetch all diffs (blocking) so overlaps are ready before the TUI opens
if entries:
- first_pr = entries[0].pr
- _tui_step_start("fetch_diff", f"PR #{first_pr.number}")
- diff_text = _fetch_pr_diff(ctx.token, ctx.github_repository,
first_pr.number)
- if diff_text:
- diff_cache[first_pr.number] = diff_text
- tui.set_diff(first_pr.number, diff_text)
- else:
- fallback = f"Could not fetch diff. Review at: {first_pr.url}/files"
- diff_cache[first_pr.number] = fallback
- tui.set_diff(first_pr.number, fallback)
- _tui_step_done("fetch_diff", "loaded")
- # Prefetch diffs for next few PRs in background
- _tui_step_start("prefetch")
- for prefetch_entry in entries[1:4]:
- _submit_diff_fetch(prefetch_entry.pr.number, prefetch_entry.pr.url)
- _tui_step_done("prefetch", f"{min(3, len(entries) - 1)} queued")
+ from airflow_breeze.utils.pr_context import
extract_file_paths_from_diff, find_overlapping_prs
+
+ _tui_step_start("fetch_diffs", f"0/{len(entries)}")
+ for e in entries:
+ _submit_diff_fetch(e.pr.number, e.pr.url)
+ fetched = 0
+ while diff_pending:
+ done = [num for num, fut in diff_pending.items() if fut.done()]
+ for num in done:
+ fut = diff_pending.pop(num)
+ try:
+ result = fut.result()
+ except Exception:
+ result = None
+ if result:
+ diff_cache[num] = result
+ paths = extract_file_paths_from_diff(result)
+ if paths:
+ _pr_file_paths[num] = paths
+ entry_map = {e.pr.number: e for e in entries}
+ if num in entry_map:
+ entry_map[num].file_paths = paths
+ else:
+ pr_entry = pr_map.get(num)
+ fallback_url = pr_entry.url if pr_entry else ""
+ diff_cache[num] = f"Could not fetch diff. Review at:
{fallback_url}/files"
+ fetched += 1
+ _tui_step_start("fetch_diffs", f"{fetched}/{len(entries)}")
+ if diff_pending:
+ import time as _time_mod
+
+ _time_mod.sleep(0.1)
+ # Set diff for first PR in TUI
+ if entries[0].pr.number in diff_cache:
+ tui.set_diff(entries[0].pr.number,
diff_cache[entries[0].pr.number])
+ _tui_step_done("fetch_diffs", f"{fetched} loaded")
+
+ # Build overlaps now that all file paths are known
+ _tui_step_start("build_overlaps")
+ entry_map = {e.pr.number: e for e in entries}
+ overlap_count = 0
+ for pr_num, paths in _pr_file_paths.items():
+ if pr_num in entry_map:
+ overlaps = find_overlapping_prs(paths, pr_num, _pr_file_paths)
+ entry_map[pr_num].overlapping_prs = overlaps or None
+ if overlaps:
+ overlap_count += 1
+ _tui_step_done("build_overlaps", f"{overlap_count} PRs with overlaps")
else:
- _tui_step_done("fetch_diff", "no PRs")
- _tui_step_done("prefetch", "skipped")
+ _tui_step_done("fetch_diffs", "no PRs")
+ _tui_step_done("build_overlaps", "skipped")
_tui_progress.stop()
@@ -4607,8 +4665,8 @@ def _run_tui_triage(
for entry in entries:
if entry.llm_status in ("in_progress", "pending"):
n = entry.pr.number
- fut = _llm_pr_to_future.get(n)
- if fut is not None and not fut.done():
+ llm_fut = _llm_pr_to_future.get(n)
+ if llm_fut is not None and not llm_fut.done():
_undone_entries.append(entry)
elif n in _llm_completed_pr_nums and n not in
_llm_result_pr_nums:
entry.llm_status = "error"
diff --git a/dev/breeze/src/airflow_breeze/utils/pr_context.py
b/dev/breeze/src/airflow_breeze/utils/pr_context.py
new file mode 100644
index 00000000000..e056cd30f5b
--- /dev/null
+++ b/dev/breeze/src/airflow_breeze/utils/pr_context.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Cross-references and overlapping PR detection for auto-triage."""
+
+from __future__ import annotations
+
+import re
+
+
+def extract_file_paths_from_diff(diff_text: str) -> list[str]:
+ """Extract file paths from a unified diff.
+
+ Parses ``diff --git a/path b/path`` headers and returns
+ deduplicated file paths in order of appearance.
+ """
+ paths: list[str] = []
+ seen: set[str] = set()
+ for match in re.finditer(r"^diff --git a/(.+?) b/", diff_text,
re.MULTILINE):
+ path = match.group(1)
+ if path not in seen:
+ paths.append(path)
+ seen.add(path)
+ return paths
+
+
+def extract_cross_references(body: str, exclude_number: int = 0) -> list[int]:
+ """Extract issue/PR numbers referenced as #N in the PR body.
+
+ Returns deduplicated numbers in order of appearance, excluding
+ the PR's own number.
+ """
+ refs: list[int] = []
+ seen: set[int] = set()
+ for match in re.finditer(r"(?<!\w)#(\d+)(?!\d)", body):
+ num = int(match.group(1))
+ if num != exclude_number and num not in seen:
+ refs.append(num)
+ seen.add(num)
+ return refs
+
+
+def find_overlapping_prs(
+ target_files: list[str],
+ target_number: int,
+ other_prs: dict[int, list[str]],
+) -> dict[int, list[str]]:
+ """Find open PRs that touch the same files as the target PR.
+
+ Args:
+ target_files: File paths changed by the target PR.
+ target_number: PR number of the target (excluded from results).
+ other_prs: Mapping of PR number -> list of file paths for other open
PRs.
+
+ Returns:
+ Dict of PR number -> list of overlapping file paths, sorted by
+ overlap count descending.
+ """
+ if not target_files:
+ return {}
+
+ target_set = set(target_files)
+ overlaps: dict[int, list[str]] = {}
+
+ for pr_num, pr_files in other_prs.items():
+ if pr_num == target_number:
+ continue
+ common = target_set & set(pr_files)
+ if common:
+ overlaps[pr_num] = sorted(common)
+
+ return dict(sorted(overlaps.items(), key=lambda x: -len(x[1])))
diff --git a/dev/breeze/src/airflow_breeze/utils/tui_display.py
b/dev/breeze/src/airflow_breeze/utils/tui_display.py
index 4eaaadaa5fd..93cb229b05a 100644
--- a/dev/breeze/src/airflow_breeze/utils/tui_display.py
+++ b/dev/breeze/src/airflow_breeze/utils/tui_display.py
@@ -115,6 +115,7 @@ class TUIAction(Enum):
ACTION_FLAG = "flag"
ACTION_LLM = "llm"
ACTION_AUTHOR_INFO = "author_info"
+ SEARCH = "search"
class _FocusPanel(Enum):
@@ -328,6 +329,8 @@ def _read_tui_key(*, timeout: float | None = None) ->
TUIAction | MouseEvent | s
return TUIAction.ACTION_LLM
if ch == "i":
return TUIAction.ACTION_AUTHOR_INFO
+ if ch == "/":
+ return TUIAction.SEARCH
# Ctrl-C
if ch == "\x03":
return TUIAction.QUIT
@@ -355,6 +358,12 @@ class PRListEntry:
self.llm_attempts: int = 0 # number of LLM attempts (including
retries)
# Author scoring (populated when author profile is fetched)
self.author_scoring: dict | None = None
+ # File paths changed by this PR (populated from diff)
+ self.file_paths: list[str] | None = None
+ # Cross-references found in PR body
+ self.cross_refs: list[int] | None = None
+ # Overlapping PRs (PR number -> list of shared files)
+ self.overlapping_prs: dict[int, list[str]] | None = None
class TriageTUI:
@@ -914,6 +923,28 @@ class TriageTUI:
label_text += f" (+{len(pr.labels) - 8} more)"
lines.append(f"Labels: {label_text}")
+ # Cross-references
+ if entry.cross_refs:
+ lines.append("")
+ ref_links = [
+
f"[link=https://github.com/{self.github_repository}/issues/{n}]#{n}[/link]"
+ for n in entry.cross_refs[:10]
+ ]
+ lines.append(f"References: {', '.join(ref_links)}")
+
+ # Overlapping PRs (other open PRs touching the same files)
+ if entry.overlapping_prs:
+ lines.append("")
+ lines.append(f"[yellow]Overlapping PRs
({len(entry.overlapping_prs)}):[/]")
+ for opr_num, shared_files in
list(entry.overlapping_prs.items())[:5]:
+ opr_link = (
+
f"[link=https://github.com/{self.github_repository}/pull/{opr_num}]#{opr_num}[/link]"
+ )
+ files_text = ", ".join(f.rsplit("/", 1)[-1] for f in
shared_files[:3])
+ if len(shared_files) > 3:
+ files_text += f" +{len(shared_files) - 3} more"
+ lines.append(f" {opr_link}: {files_text}")
+
# Assessment / flagging details (summary and violations)
assessment = self._assessments.get(pr.number)
if assessment:
@@ -1258,14 +1289,14 @@ class TriageTUI:
if self._focus in (_FocusPanel.DIFF, _FocusPanel.DETAIL):
nav_lines = [
"[bold]j/↓[/] Down [bold]k/↑[/] Up [bold]PgDn[/] Page",
- f"[bold]Tab[/] → {next_panel} [bold]🖱[/] Scroll",
+ f"[bold]Tab[/] → {next_panel} [bold]/[/] Search [bold]🖱[/]
Scroll",
"[bold]Esc/q[/] Quit",
]
else:
nav_lines = [
"[bold]j/↓[/] Down [bold]k/↑[/] Up [bold]q[/] Quit",
f"[bold]n[/] Next pg [bold]p[/] Prev pg [bold]Tab[/] →
{next_panel}",
- "[bold]🖱[/] Click row / Scroll panels",
+ "[bold]/[/] Search [bold]🖱[/] Click row / Scroll panels",
]
nav_text = "\n".join(nav_lines)
nav_panel = Panel(nav_text, title="Nav", border_style="dim",
padding=(0, 1))
@@ -1696,6 +1727,63 @@ class TriageTUI:
sys.stdout.write("\033[?25h") # restore cursor
sys.stdout.flush()
+ def search_jump(self) -> bool:
+ """Show a search prompt at the bottom of the screen.
+
+ The user types a PR number or text. Pressing Enter jumps to the first
+ matching entry. Pressing Escape cancels. Returns True if the cursor
moved.
+ """
+ width, height = _get_terminal_size()
+ prompt = "/ Jump to PR #: "
+ query = ""
+
+ while True:
+ # Draw prompt on the last row
+ display = f"{prompt}{query}_"
+ sys.stdout.write(f"\033[{height};1H\033[2K{display}")
+ sys.stdout.flush()
+
+ ch = _read_raw_input(timeout=None)
+ if ch is None or ch == "\x1b":
+ # Escape or timeout — cancel
+ break
+ if ch in ("\r", "\n"):
+ # Enter — search
+ break
+ if ch in ("\x7f", "\x08"):
+ # Backspace
+ query = query[:-1]
+ continue
+ if ch == "\x03":
+ # Ctrl-C — cancel
+ break
+ if len(ch) == 1 and ch.isprintable():
+ query += ch
+
+ # Clear the prompt line
+ sys.stdout.write(f"\033[{height};1H\033[2K")
+ sys.stdout.flush()
+
+ if not query:
+ return False
+
+ # Match by PR number only
+ try:
+ target_num = int(query.lstrip("#"))
+ except ValueError:
+ return False
+
+ for idx, entry in enumerate(self.entries):
+ if entry.pr.number == target_num:
+ self.cursor = idx
+ # Put the matched entry at the top of the visible list
+ self.scroll_offset = idx
+ # Switch focus to PR list so the selection is highlighted
+ self._focus = _FocusPanel.PR_LIST
+ return True
+
+ return False
+
def run_interactive(
self, *, timeout: float | None = None
) -> tuple[PRListEntry | None, TUIAction | str | None]:
@@ -1772,6 +1860,10 @@ class TriageTUI:
if entry and not entry.action_taken and key in
self.get_available_actions(entry):
return entry, key
return None, key
+ if key == TUIAction.SEARCH:
+ if self.search_jump():
+ return None, TUIAction.UP
+ return None, key
# Ignore other keys in diff focus
return None, key
@@ -1821,6 +1913,8 @@ class TriageTUI:
if entry and not entry.action_taken and key in
self.get_available_actions(entry):
return entry, key
return None, key
+ if key == TUIAction.SEARCH:
+ return None, key
# Ignore other keys in detail focus
return None, key
@@ -1880,5 +1974,9 @@ class TriageTUI:
if entry and not entry.action_taken and key in
self.get_available_actions(entry):
return entry, key
return None, key
+ if key == TUIAction.SEARCH:
+ if self.search_jump():
+ return None, TUIAction.UP # signal cursor moved
+ return None, key
# Unknown key — return it for caller to handle
return self.get_selected_entry(), key
diff --git a/dev/breeze/tests/test_pr_context.py
b/dev/breeze/tests/test_pr_context.py
new file mode 100644
index 00000000000..cd1a282adeb
--- /dev/null
+++ b/dev/breeze/tests/test_pr_context.py
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from airflow_breeze.utils.pr_context import (
+ extract_cross_references,
+ extract_file_paths_from_diff,
+ find_overlapping_prs,
+)
+
+
+class TestExtractFilePathsFromDiff:
+ def test_basic_diff(self):
+ diff = (
+ "diff --git a/src/foo.py b/src/foo.py\n"
+ "index abc..def 100644\n"
+ "--- a/src/foo.py\n"
+ "+++ b/src/foo.py\n"
+ "@@ -1,3 +1,4 @@\n"
+ " line1\n"
+ "+line2\n"
+ "diff --git a/src/bar.py b/src/bar.py\n"
+ "index ghi..jkl 100644\n"
+ )
+ assert extract_file_paths_from_diff(diff) == ["src/foo.py",
"src/bar.py"]
+
+ def test_deduplicates(self):
+ diff = "diff --git a/x.py b/x.py\ndiff --git a/x.py b/x.py\n"
+ assert extract_file_paths_from_diff(diff) == ["x.py"]
+
+ def test_empty_diff(self):
+ assert extract_file_paths_from_diff("") == []
+
+ def test_rename(self):
+ diff = "diff --git a/old/path.py b/new/path.py\n"
+ assert extract_file_paths_from_diff(diff) == ["old/path.py"]
+
+
+class TestExtractCrossReferences:
+ def test_basic_refs(self):
+ body = "This fixes #123 and relates to #456."
+ assert extract_cross_references(body) == [123, 456]
+
+ def test_excludes_self(self):
+ body = "See #100 and #200"
+ assert extract_cross_references(body, exclude_number=100) == [200]
+
+ def test_deduplicates(self):
+ body = "Fixes #123. Also see #123 again."
+ assert extract_cross_references(body) == [123]
+
+ def test_no_refs(self):
+ assert extract_cross_references("Just a plain description.") == []
+
+ def test_ignores_anchors(self):
+ body = "See [link](#section) and #42"
+ # #section is not a number so only #42 matches
+ assert extract_cross_references(body) == [42]
+
+ def test_ignores_mid_word(self):
+ body = "color#123 should not match but #456 should"
+ assert extract_cross_references(body) == [456]
+
+
+class TestFindOverlappingPrs:
+ def test_basic_overlap(self):
+ target_files = ["src/foo.py", "src/bar.py"]
+ other_prs = {
+ 200: ["src/foo.py", "src/baz.py"],
+ 300: ["src/qux.py"],
+ 400: ["src/bar.py", "src/foo.py"],
+ }
+ result = find_overlapping_prs(target_files, 100, other_prs)
+ assert 200 in result
+ assert 400 in result
+ assert 300 not in result
+ # PR 400 has 2 overlapping files, should come first
+ assert next(iter(result.keys())) == 400
+
+ def test_excludes_self(self):
+ result = find_overlapping_prs(["a.py"], 100, {100: ["a.py"]})
+ assert result == {}
+
+ def test_empty_files(self):
+ assert find_overlapping_prs([], 100, {200: ["a.py"]}) == {}
+
+ def test_no_overlap(self):
+ result = find_overlapping_prs(["a.py"], 100, {200: ["b.py"]})
+ assert result == {}