This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new dba47277f30 fix: correct airflowignore negation pattern handling for
directory-only patterns (#62860)
dba47277f30 is described below
commit dba47277f309255c7d40b1001578411d5cb29e2b
Author: Yoann <[email protected]>
AuthorDate: Wed Mar 4 06:05:34 2026 -0800
fix: correct airflowignore negation pattern handling for directory-only
patterns (#62860)
Directory-only patterns (ending with /) in .airflowignore were incorrectly
matching files inside the directory, not just the directory itself. This
caused
negation patterns like !abc/def/ to un-ignore all files within abc/def/
instead
of only un-ignoring the directory for traversal purposes.
Added dir_only flag to _GlobIgnoreRule that skips file matching for
directory-only patterns, consistent with gitignore specification.
Closes: #62716
---
.../module_loading/file_discovery.py | 18 ++++++++--
.../tests/module_loading/test_file_discovery.py | 41 ++++++++++++++++++++++
2 files changed, 57 insertions(+), 2 deletions(-)
diff --git
a/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py
b/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py
index 8b1536544f8..523dc8d35f1 100644
--- a/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py
+++ b/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py
@@ -79,6 +79,7 @@ class _GlobIgnoreRule(NamedTuple):
wild_match_pattern: GitWildMatchPattern
relative_to: Path | None = None
+ dir_only: bool = False
@staticmethod
def compile(pattern: str, base_dir: Path, definition_file: Path) ->
_IgnoreRule | None:
@@ -95,8 +96,15 @@ class _GlobIgnoreRule(NamedTuple):
# > Otherwise the pattern may also match at any level below the
.gitignore level.
relative_to = definition_file.parent
+ # See https://git-scm.com/docs/gitignore
+ # > If there is a separator at the end of the pattern then the pattern
will only match
+ # > directories, otherwise the pattern can match both files and
directories.
+ # Strip the negation prefix before checking for trailing separator.
+ raw_pattern = pattern.lstrip("!")
+ dir_only = raw_pattern.rstrip() != raw_pattern.rstrip().rstrip("/")
+
ignore_pattern = GitWildMatchPattern(pattern)
- return _GlobIgnoreRule(wild_match_pattern=ignore_pattern,
relative_to=relative_to)
+ return _GlobIgnoreRule(wild_match_pattern=ignore_pattern,
relative_to=relative_to, dir_only=dir_only)
@staticmethod
def match(path: Path, rules: list[_IgnoreRule]) -> bool:
@@ -105,8 +113,14 @@ class _GlobIgnoreRule(NamedTuple):
for rule in rules:
if not isinstance(rule, _GlobIgnoreRule):
raise ValueError(f"_GlobIgnoreRule cannot match rules of type:
{type(rule)}")
+ # See https://git-scm.com/docs/gitignore
+ # > If there is a separator at the end of the pattern then the
pattern will only match
+ # > directories, otherwise the pattern can match both files and
directories.
+ is_dir = path.is_dir()
+ if rule.dir_only and not is_dir:
+ continue
rel_obj = path.relative_to(rule.relative_to) if rule.relative_to
else Path(path.name)
- if path.is_dir():
+ if is_dir:
rel_path = f"{rel_obj.as_posix()}/"
else:
rel_path = rel_obj.as_posix()
diff --git a/shared/module_loading/tests/module_loading/test_file_discovery.py
b/shared/module_loading/tests/module_loading/test_file_discovery.py
index 0c5347dca1d..a745f47fce6 100644
--- a/shared/module_loading/tests/module_loading/test_file_discovery.py
+++ b/shared/module_loading/tests/module_loading/test_file_discovery.py
@@ -137,3 +137,44 @@ class TestFindPathFromDirectory:
detected.add(p.relative_to(dags_root).as_posix())
assert detected == {"a/b/subfolder/keep.py"}
+
+ def
test_airflowignore_negation_directory_only_patterns_do_not_unignore_files(self,
tmp_path):
+ """Directory-only negation patterns should only unignore directories,
not files inside them.
+
+ Regression test for https://github.com/apache/airflow/issues/62716
+
+ Patterns:
+ * -> ignore everything
+ !abc/ -> unignore abc dir (for traversal), NOT its
contents
+ !abc/def/ -> unignore abc/def dir (for traversal), NOT
its contents
+ !abc/def/xyz/ -> unignore abc/def/xyz dir (for traversal),
NOT its contents
+ !abc/def/xyz/* -> unignore contents of abc/def/xyz
+ """
+ dags_root = tmp_path / "dags"
+ (dags_root / "abc" / "def" / "xyz").mkdir(parents=True)
+
+ # files at various levels – only xyz_dag.py should be discovered
+ (dags_root / "root_dag.py").write_text("raise Exception('ignored')\n")
+ (dags_root / "abc" / "abc_dag.py").write_text("raise
Exception('ignored')\n")
+ (dags_root / "abc" / "def" / "def_dag.py").write_text("raise
Exception('ignored')\n")
+ (dags_root / "abc" / "def" / "xyz" / "xyz_dag.py").write_text("#
should be discovered\n")
+
+ (dags_root / ".airflowignore").write_text(
+ "\n".join(
+ [
+ "*",
+ "!abc/",
+ "!abc/def/",
+ "!abc/def/xyz/",
+ "!abc/def/xyz/*",
+ ]
+ )
+ )
+
+ detected = set()
+ for raw in find_path_from_directory(dags_root, ".airflowignore",
"glob"):
+ p = Path(raw)
+ if p.is_file() and p.suffix == ".py":
+ detected.add(p.relative_to(dags_root).as_posix())
+
+ assert detected == {"abc/def/xyz/xyz_dag.py"}