This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new f32991b8022 Restrict reproducible-build dirty check to
dockerignore-allowed paths (#67663)
f32991b8022 is described below
commit f32991b8022348f1bc363212c0f09bd3bc479a8b
Author: Jarek Potiuk <[email protected]>
AuthorDate: Thu Jun 4 15:36:25 2026 +0200
Restrict reproducible-build dirty check to dockerignore-allowed paths
(#67663)
Official airflow-core builds use a deny-by-default `.dockerignore` plus
an explicit allowlist (`!path`) and run inside a docker container that
sees only the allowlisted files. Inside the container, `git is_dirty()`
flags every tracked-but-not-COPYed file as a deletion (~220 files,
~30k lines), and every clean-tag release ships with a misleading
`git_version = .dev0+<SHA>.dirty` instead of `.release:<SHA>`.
Restrict the dirty check to paths in the dockerignore allowlist
(parsed at build time so it stays in sync) so a clean tag produces a
`.release:<SHA>` git_version. Falls back to the previous global
`is_dirty()` when `.dockerignore` is absent or unparseable.
---
airflow-core/hatch_build.py | 41 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/airflow-core/hatch_build.py b/airflow-core/hatch_build.py
index e199d4b70d3..5971b86e91b 100644
--- a/airflow-core/hatch_build.py
+++ b/airflow-core/hatch_build.py
@@ -71,6 +71,40 @@ class CustomBuild(BuilderInterface[BuilderConfig,
PluginManager]):
dist_path = Path(self.root) / "src" / "airflow" / "ui" / "dist"
return dist_path.resolve().as_posix()
+ @staticmethod
+ def _dockerignore_allowed_paths(work_dir: Path) -> list[str] | None:
+ # The repo's ``.dockerignore`` follows a deny-by-default pattern
(``**``)
+ # plus an explicit allowlist of ``!path`` entries. When the package is
+ # built inside the docker image (``COPY . /opt/airflow``), only the
+ # allowlisted files reach the container, so paths outside the allowlist
+ # are guaranteed to be absent and would otherwise be reported as
+ # ``deleted`` by ``git diff`` — stamping every release as ``.dirty``.
+ # Parse the allowlist at build time so this stays in sync with the
file.
+ dockerignore = work_dir / ".dockerignore"
+ if not dockerignore.exists():
+ return None
+ allowed: list[str] = []
+ for raw_line in dockerignore.read_text().splitlines():
+ line = raw_line.strip()
+ if not line or line.startswith("#") or not line.startswith("!"):
+ continue
+ path = line[1:].strip().rstrip("/")
+ if path:
+ allowed.append(path)
+ return allowed or None
+
+ @classmethod
+ def _is_dirty_within_build_context(cls, repo: Any, work_dir: Path) -> bool:
+ allowed_paths = cls._dockerignore_allowed_paths(work_dir)
+ if not allowed_paths:
+ return repo.is_dirty()
+ try:
+ diff = repo.git.diff("HEAD", "--name-only", "--", *allowed_paths)
+ except Exception as exc:
+ log.warning("Restricted dirty check failed (%s); falling back to
is_dirty().", exc)
+ return repo.is_dirty()
+ return bool(diff.strip())
+
def get_git_version(self) -> str:
"""
Return a version to identify the state of the underlying git repo.
@@ -81,6 +115,10 @@ class CustomBuild(BuilderInterface[BuilderConfig,
PluginManager]):
current branch head. Finally, a "dirty" suffix is appended to indicate
that uncommitted
changes are present.
+ The "dirty" check is restricted to paths that are part of the docker
build context (per
+ ``.dockerignore``). This avoids spurious ``.dirty`` markers on
official builds, where the
+ deny-by-default ``.dockerignore`` excludes tracked top-level files
from ``COPY``.
+
Example pre-release version:
".dev0+2f635dc265e78db6708f59f68e8009abb92c1e65".
Example release version:
".release+2f635dc265e78db6708f59f68e8009abb92c1e65".
Example modified release version:
".release+2f635dc265e78db6708f59f68e8009abb92c1e65".dirty
@@ -106,7 +144,8 @@ class CustomBuild(BuilderInterface[BuilderConfig,
PluginManager]):
return ""
if repo:
sha = repo.head.commit.hexsha
- if repo.is_dirty():
+ work_dir = Path(self.root).parent.resolve()
+ if self._is_dirty_within_build_context(repo, work_dir):
return f".dev0+{sha}.dirty"
# commit is clean
return f".release:{sha}"