This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 16c544079da Remove '.git' folder from versions in GitDagBundle (#57069)
16c544079da is described below

commit 16c544079dab9cd5967951b4dc8076ade19f0ca8
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Tue Oct 28 15:28:54 2025 +0100

    Remove '.git' folder from versions in GitDagBundle (#57069)
    
    * Remove '.git' folder from versions in GitDagBundle
    
    To reduce storage size, this PR removes the git repo in versions when
    they are created since the git repo is not necessary on versions.
    
    * Apply suggestion from @ashb
    
    Co-authored-by: Ash Berlin-Taylor <[email protected]>
    
    * fixup! Apply suggestion from @ashb
    
    * Rename arg to prune_dotgit_folder
    
    * Add news fragment item
    
    * Fix typo
    
    * Update comment about prune_dotgit_folder
    
    ---------
    
    Co-authored-by: Ash Berlin-Taylor <[email protected]>
---
 airflow-core/newsfragments/57069.significant.rst   | 16 ++++++++
 providers/git/docs/bundles/index.rst               |  1 +
 .../git/src/airflow/providers/git/bundles/git.py   | 10 +++++
 providers/git/tests/unit/git/bundles/test_git.py   | 43 ++++++++++++++++++++++
 4 files changed, 70 insertions(+)

diff --git a/airflow-core/newsfragments/57069.significant.rst 
b/airflow-core/newsfragments/57069.significant.rst
new file mode 100644
index 00000000000..a4d1c22bb7a
--- /dev/null
+++ b/airflow-core/newsfragments/57069.significant.rst
@@ -0,0 +1,16 @@
+Git provider: Remove '.git' folder from versions in GitDagBundle
+
+A new option(``prune_dotgit_folder``) has been added to the GitDagBundle to 
remove ``.git`` from
+versioned bundles by default to reduce disk usage; set 
prune_dotgit_folder=False to keep
+repo metadata in the dag bundle's versions folders.
+
+* Types of change
+
+  * [ ] Dag changes
+  * [ ] Config changes
+  * [ ] API changes
+  * [ ] CLI changes
+  * [x] Behaviour changes
+  * [ ] Plugin changes
+  * [ ] Dependency changes
+  * [ ] Code interface changes
diff --git a/providers/git/docs/bundles/index.rst 
b/providers/git/docs/bundles/index.rst
index 4e9cb5dff22..30e6ade8989 100644
--- a/providers/git/docs/bundles/index.rst
+++ b/providers/git/docs/bundles/index.rst
@@ -35,6 +35,7 @@ Example of using the GitDagBundle:
              "subdir": "dags",
              "tracking_ref": "main",
              "refresh_interval": 3600
+             "prune_dotgit_folder": True
          }
      }
     ]'
diff --git a/providers/git/src/airflow/providers/git/bundles/git.py 
b/providers/git/src/airflow/providers/git/bundles/git.py
index 2bf1f9980ae..3418092f0d7 100644
--- a/providers/git/src/airflow/providers/git/bundles/git.py
+++ b/providers/git/src/airflow/providers/git/bundles/git.py
@@ -45,6 +45,12 @@ class GitDagBundle(BaseDagBundle):
     :param subdir: Subdirectory within the repository where the DAGs are 
stored (Optional)
     :param git_conn_id: Connection ID for SSH/token based connection to the 
repository (Optional)
     :param repo_url: Explicit Git repository URL to override the connection's 
host. (Optional)
+    :param prune_dotgit_folder: Remove .git folder from the versions after 
cloning.
+
+        The per-version clone is not a full "git" copy (it makes use of git's 
`--local` ability
+        to share the object directory via hard links, but if you have a lot of 
current versions
+        running, or an especially large git repo leaving this as True will 
save some disk space
+        at the expense of `git` operations not working in the bundle that 
Tasks run from.
     """
 
     supports_versioning = True
@@ -56,6 +62,7 @@ class GitDagBundle(BaseDagBundle):
         subdir: str | None = None,
         git_conn_id: str | None = None,
         repo_url: str | None = None,
+        prune_dotgit_folder: bool = True,
         **kwargs,
     ) -> None:
         super().__init__(**kwargs)
@@ -68,6 +75,7 @@ class GitDagBundle(BaseDagBundle):
             self.repo_path = self.base_dir / "tracking_repo"
         self.git_conn_id = git_conn_id
         self.repo_url = repo_url
+        self.prune_dotgit_folder = prune_dotgit_folder
 
         self._log = log.bind(
             bundle_name=self.name,
@@ -115,6 +123,8 @@ class GitDagBundle(BaseDagBundle):
                     self.repo.remotes.origin.fetch()
                 
self.repo.head.set_reference(str(self.repo.commit(self.version)))
                 self.repo.head.reset(index=True, working_tree=True)
+                if self.prune_dotgit_folder:
+                    shutil.rmtree(self.repo_path / ".git")
             else:
                 self.refresh()
             self.repo.close()
diff --git a/providers/git/tests/unit/git/bundles/test_git.py 
b/providers/git/tests/unit/git/bundles/test_git.py
index b6c110e3547..824b85ed4b3 100644
--- a/providers/git/tests/unit/git/bundles/test_git.py
+++ b/providers/git/tests/unit/git/bundles/test_git.py
@@ -165,6 +165,7 @@ class TestGitDagBundle:
             git_conn_id=CONN_HTTPS,
             version=starting_commit.hexsha,
             tracking_ref=GIT_DEFAULT_BRANCH,
+            prune_dotgit_folder=False,
         )
         bundle.initialize()
 
@@ -196,6 +197,7 @@ class TestGitDagBundle:
             git_conn_id=CONN_HTTPS,
             version="test",
             tracking_ref=GIT_DEFAULT_BRANCH,
+            prune_dotgit_folder=False,
         )
         bundle.initialize()
         assert bundle.get_current_version() == starting_commit.hexsha
@@ -225,6 +227,47 @@ class TestGitDagBundle:
 
         assert_repo_is_closed(bundle)
 
+    @mock.patch("airflow.providers.git.bundles.git.GitHook")
+    def test_removes_git_dir_for_versioned_bundle_by_default(self, 
mock_githook, git_repo):
+        repo_path, repo = git_repo
+        mock_githook.return_value.repo_url = repo_path
+        starting_commit = repo.head.commit
+
+        bundle = GitDagBundle(
+            name="test",
+            git_conn_id=CONN_HTTPS,
+            version=starting_commit.hexsha,
+            tracking_ref=GIT_DEFAULT_BRANCH,
+        )
+        bundle.initialize()
+
+        assert not (bundle.repo_path / ".git").exists()
+
+        files_in_repo = {f.name for f in bundle.path.iterdir() if f.is_file()}
+        assert {"test_dag.py"} == files_in_repo
+
+        assert_repo_is_closed(bundle)
+
+    @mock.patch("airflow.providers.git.bundles.git.GitHook")
+    def test_keeps_git_dir_when_disabled(self, mock_githook, git_repo):
+        repo_path, repo = git_repo
+        mock_githook.return_value.repo_url = repo_path
+        starting_commit = repo.head.commit
+
+        bundle = GitDagBundle(
+            name="test",
+            git_conn_id=CONN_HTTPS,
+            version=starting_commit.hexsha,
+            tracking_ref=GIT_DEFAULT_BRANCH,
+            prune_dotgit_folder=False,
+        )
+        bundle.initialize()
+
+        assert (bundle.repo_path / ".git").exists()
+        assert bundle.get_current_version() == starting_commit.hexsha
+
+        assert_repo_is_closed(bundle)
+
     @pytest.mark.parametrize(
         "amend",
         [

Reply via email to