This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new bc1df029af1 Validate GCSToSambaOperator destination path stays within 
destination_path (#67857)
bc1df029af1 is described below

commit bc1df029af15cb1d35d5ca0d33bf9235500137cc
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue Jun 2 20:08:49 2026 +0200

    Validate GCSToSambaOperator destination path stays within destination_path 
(#67857)
    
    GCS object names are read from the source bucket and may contain ".."
    segments. GCSToSambaOperator._resolve_destination_path joined the object
    name onto the configured destination_path without normalisation, so a
    crafted object name could resolve an SMB write target outside the intended
    directory. Normalise the resolved path and refuse to write when it falls
    outside destination_path.
    
    Generated-by: Claude Opus 4.8 (1M context)
---
 .../providers/samba/transfers/gcs_to_samba.py      | 12 +++++++-
 .../unit/samba/transfers/test_gcs_to_samba.py      | 32 ++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git 
a/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py 
b/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py
index 5816b38c113..950337438ea 100644
--- a/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py
+++ b/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py
@@ -177,7 +177,17 @@ class GCSToSambaOperator(BaseOperator):
                 source_object = os.path.relpath(source_object, start=prefix)
             else:
                 source_object = os.path.basename(source_object)
-        return os.path.join(self.destination_path, source_object)
+        # Source object names come from the GCS bucket and may contain ".." 
segments.
+        # Normalize the joined path and make sure it stays within 
destination_path so a
+        # crafted object name cannot resolve a write target outside the 
configured directory.
+        resolved = os.path.normpath(os.path.join(self.destination_path, 
source_object))
+        base = os.path.normpath(self.destination_path)
+        if resolved != base and not resolved.startswith(base + os.sep):
+            raise ValueError(
+                f"Resolved destination path {resolved!r} is outside the 
configured "
+                f"destination_path {base!r}; refusing to write outside it."
+            )
+        return resolved
 
     def _copy_single_object(
         self,
diff --git a/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py 
b/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py
index 45e518e401c..97fb25dcc7c 100644
--- a/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py
+++ b/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py
@@ -370,3 +370,35 @@ class TestGoogleCloudStorageToSambaOperator:
         )
         with pytest.raises(AirflowException):
             operator.execute(None)
+
+    @pytest.mark.parametrize(
+        "source_object",
+        [
+            "../../victim_area/payload",
+            "../escape",
+            "subdir/../../escape",
+        ],
+    )
+    def test_resolve_destination_path_rejects_traversal(self, source_object):
+        operator = GCSToSambaOperator(
+            task_id=TASK_ID,
+            source_bucket=TEST_BUCKET,
+            source_object=source_object,
+            destination_path=DESTINATION_SMB,
+            gcp_conn_id=GCP_CONN_ID,
+            samba_conn_id=SAMBA_CONN_ID,
+        )
+        with pytest.raises(ValueError, match="outside the configured"):
+            operator._resolve_destination_path(source_object)
+
+    def test_resolve_destination_path_allows_contained_object(self):
+        operator = GCSToSambaOperator(
+            task_id=TASK_ID,
+            source_bucket=TEST_BUCKET,
+            source_object="dir/file.txt",
+            destination_path=DESTINATION_SMB,
+            gcp_conn_id=GCP_CONN_ID,
+            samba_conn_id=SAMBA_CONN_ID,
+        )
+        resolved = operator._resolve_destination_path("dir/file.txt")
+        assert resolved == os.path.join(DESTINATION_SMB, "dir/file.txt")

Reply via email to