This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 3fe68e79b06 Fix GitDagBundle re-cloning on every task when 
prune_dotgit_folder is True (#61847)
3fe68e79b06 is described below

commit 3fe68e79b0651959c4ddfbd4a06c101831dcff80
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Mon Feb 16 08:16:46 2026 +0100

    Fix GitDagBundle re-cloning on every task when prune_dotgit_folder is True 
(#61847)
    
    * Fix GitDagBundle re-cloning on every task when prune_dotgit_folder is True
    
    When a version directory already existed without a .git folder, workers 
treated
    it as invalid and deleted it then re-cloned. Tasks using the same bundle 
version
    would trigger a full clone on every run.
    
    Detect an existing pruned worktree and reuse it instead of cloning again
    
    * fixup! Fix GitDagBundle re-cloning on every task when prune_dotgit_folder 
is True
---
 .../git/src/airflow/providers/git/bundles/git.py   | 24 ++++++++++++-
 providers/git/tests/unit/git/bundles/test_git.py   | 42 ++++++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/providers/git/src/airflow/providers/git/bundles/git.py 
b/providers/git/src/airflow/providers/git/bundles/git.py
index 1f0b0c56f6a..9b3e873cd4c 100644
--- a/providers/git/src/airflow/providers/git/bundles/git.py
+++ b/providers/git/src/airflow/providers/git/bundles/git.py
@@ -115,8 +115,25 @@ class GitDagBundle(BaseDagBundle):
             self.repo_url = self.hook.repo_url
             self._log.debug("repo_url updated from hook")
 
+    def _is_pruned_worktree(self) -> bool:
+        # True if version path exists and has no .git
+        if not self.version:
+            return False
+        if not self.repo_path.exists() or not self.repo_path.is_dir():
+            return False
+        return not (self.repo_path / ".git").exists()
+
     def _initialize(self):
         with self.lock():
+            # Avoids re-cloning on every task run when 
prune_dotgit_folder=True.
+            if self._is_pruned_worktree():
+                self._log.debug(
+                    "Using existing pruned worktree",
+                    repo_path=self.repo_path,
+                    version=self.version,
+                )
+                return
+
             cm = self.hook.configure_hook_env() if self.hook else nullcontext()
             with cm:
                 try:
@@ -151,11 +168,14 @@ class GitDagBundle(BaseDagBundle):
                             raise RuntimeError("Error pulling submodule from 
repository") from e
 
                 if self.prune_dotgit_folder:
+                    self.repo.close()
                     shutil.rmtree(self.repo_path / ".git")
+                    self.repo = None
             else:
                 self.refresh()
 
-            self.repo.close()
+            if self.repo is not None:
+                self.repo.close()
 
     def initialize(self) -> None:
         if not self.repo_url:
@@ -246,6 +266,8 @@ class GitDagBundle(BaseDagBundle):
         )
 
     def get_current_version(self) -> str:
+        if self.version is not None and getattr(self, "repo", None) is None:
+            return self.version
         with self.repo as repo:
             return repo.head.commit.hexsha
 
diff --git a/providers/git/tests/unit/git/bundles/test_git.py 
b/providers/git/tests/unit/git/bundles/test_git.py
index 5b64b3db33d..ade0c306755 100644
--- a/providers/git/tests/unit/git/bundles/test_git.py
+++ b/providers/git/tests/unit/git/bundles/test_git.py
@@ -67,6 +67,9 @@ def git_repo(tmp_path_factory):
 
 
 def assert_repo_is_closed(bundle: GitDagBundle):
+    # When .git was pruned, repo is cleared and there is nothing to close
+    if getattr(bundle, "repo", None) is None:
+        return
     # cat-file processes get left around if the repo is not closed, so check 
it was
     assert bundle.repo.git.cat_file_all is None
     assert bundle.bare_repo.git.cat_file_all is None
@@ -266,6 +269,45 @@ class TestGitDagBundle:
 
         assert_repo_is_closed(bundle)
 
+    @mock.patch("airflow.providers.git.bundles.git.GitHook")
+    def test_second_initialize_reuses_pruned_worktree_without_recloning(self, 
mock_githook, git_repo):
+        """When version path exists without .git (pruned), second initialize() 
uses it and does not re-clone."""
+        repo_path, repo = git_repo
+        mock_githook.return_value.repo_url = repo_path
+        starting_commit = repo.head.commit
+        version = starting_commit.hexsha
+        bundle_name = "test_pruned_reuse"
+
+        # First init: clone and prune (default)
+        bundle1 = GitDagBundle(
+            name=bundle_name,
+            git_conn_id=CONN_HTTPS,
+            version=version,
+            tracking_ref=GIT_DEFAULT_BRANCH,
+            prune_dotgit_folder=True,
+        )
+        bundle1.initialize()
+        assert not (bundle1.repo_path / ".git").exists()
+        assert bundle1.get_current_version() == version
+        version_path = bundle1.repo_path
+
+        # Second init: same name and version; should detect pruned worktree 
and skip clone
+        with patch.object(GitDagBundle, "_clone_repo_if_required") as 
mock_clone:
+            bundle2 = GitDagBundle(
+                name=bundle_name,
+                git_conn_id=CONN_HTTPS,
+                version=version,
+                tracking_ref=GIT_DEFAULT_BRANCH,
+                prune_dotgit_folder=True,
+            )
+            bundle2.initialize()
+            mock_clone.assert_not_called()
+
+        assert bundle2.repo_path == version_path
+        assert bundle2.get_current_version() == version
+        files_in_repo = {f.name for f in bundle2.path.iterdir() if f.is_file()}
+        assert {"test_dag.py"} == files_in_repo
+
     @pytest.mark.parametrize(
         "amend",
         [

Reply via email to