This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new f32991b8022 Restrict reproducible-build dirty check to 
dockerignore-allowed paths (#67663)
f32991b8022 is described below

commit f32991b8022348f1bc363212c0f09bd3bc479a8b
Author: Jarek Potiuk <[email protected]>
AuthorDate: Thu Jun 4 15:36:25 2026 +0200

    Restrict reproducible-build dirty check to dockerignore-allowed paths 
(#67663)
    
    Official airflow-core builds use a deny-by-default `.dockerignore` plus
    an explicit allowlist (`!path`) and run inside a docker container that
    sees only the allowlisted files. Inside the container, `git is_dirty()`
    flags every tracked-but-not-COPYed file as a deletion (~220 files,
    ~30k lines), and every clean-tag release ships with a misleading
    `git_version = .dev0+<SHA>.dirty` instead of `.release:<SHA>`.
    
    Restrict the dirty check to paths in the dockerignore allowlist
    (parsed at build time so it stays in sync) so a clean tag produces a
    `.release:<SHA>` git_version. Falls back to the previous global
    `is_dirty()` when `.dockerignore` is absent or unparseable.
---
 airflow-core/hatch_build.py | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/airflow-core/hatch_build.py b/airflow-core/hatch_build.py
index e199d4b70d3..5971b86e91b 100644
--- a/airflow-core/hatch_build.py
+++ b/airflow-core/hatch_build.py
@@ -71,6 +71,40 @@ class CustomBuild(BuilderInterface[BuilderConfig, 
PluginManager]):
         dist_path = Path(self.root) / "src" / "airflow" / "ui" / "dist"
         return dist_path.resolve().as_posix()
 
+    @staticmethod
+    def _dockerignore_allowed_paths(work_dir: Path) -> list[str] | None:
+        # The repo's ``.dockerignore`` follows a deny-by-default pattern 
(``**``)
+        # plus an explicit allowlist of ``!path`` entries. When the package is
+        # built inside the docker image (``COPY . /opt/airflow``), only the
+        # allowlisted files reach the container, so paths outside the allowlist
+        # are guaranteed to be absent and would otherwise be reported as
+        # ``deleted`` by ``git diff`` — stamping every release as ``.dirty``.
+        # Parse the allowlist at build time so this stays in sync with the 
file.
+        dockerignore = work_dir / ".dockerignore"
+        if not dockerignore.exists():
+            return None
+        allowed: list[str] = []
+        for raw_line in dockerignore.read_text().splitlines():
+            line = raw_line.strip()
+            if not line or line.startswith("#") or not line.startswith("!"):
+                continue
+            path = line[1:].strip().rstrip("/")
+            if path:
+                allowed.append(path)
+        return allowed or None
+
+    @classmethod
+    def _is_dirty_within_build_context(cls, repo: Any, work_dir: Path) -> bool:
+        allowed_paths = cls._dockerignore_allowed_paths(work_dir)
+        if not allowed_paths:
+            return repo.is_dirty()
+        try:
+            diff = repo.git.diff("HEAD", "--name-only", "--", *allowed_paths)
+        except Exception as exc:
+            log.warning("Restricted dirty check failed (%s); falling back to 
is_dirty().", exc)
+            return repo.is_dirty()
+        return bool(diff.strip())
+
     def get_git_version(self) -> str:
         """
         Return a version to identify the state of the underlying git repo.
@@ -81,6 +115,10 @@ class CustomBuild(BuilderInterface[BuilderConfig, 
PluginManager]):
         current branch head. Finally, a "dirty" suffix is appended to indicate 
that uncommitted
         changes are present.
 
+        The "dirty" check is restricted to paths that are part of the docker 
build context (per
+        ``.dockerignore``). This avoids spurious ``.dirty`` markers on 
official builds, where the
+        deny-by-default ``.dockerignore`` excludes tracked top-level files 
from ``COPY``.
+
         Example pre-release version: 
".dev0+2f635dc265e78db6708f59f68e8009abb92c1e65".
         Example release version: 
".release+2f635dc265e78db6708f59f68e8009abb92c1e65".
         Example modified release version: 
".release+2f635dc265e78db6708f59f68e8009abb92c1e65".dirty
@@ -106,7 +144,8 @@ class CustomBuild(BuilderInterface[BuilderConfig, 
PluginManager]):
             return ""
         if repo:
             sha = repo.head.commit.hexsha
-            if repo.is_dirty():
+            work_dir = Path(self.root).parent.resolve()
+            if self._is_dirty_within_build_context(repo, work_dir):
                 return f".dev0+{sha}.dirty"
             # commit is clean
             return f".release:{sha}"

Reply via email to