This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new bc1df029af1 Validate GCSToSambaOperator destination path stays within
destination_path (#67857)
bc1df029af1 is described below
commit bc1df029af15cb1d35d5ca0d33bf9235500137cc
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue Jun 2 20:08:49 2026 +0200
Validate GCSToSambaOperator destination path stays within destination_path
(#67857)
GCS object names are read from the source bucket and may contain ".."
segments. GCSToSambaOperator._resolve_destination_path joined the object
name onto the configured destination_path without normalisation, so a
crafted object name could resolve an SMB write target outside the intended
directory. Normalise the resolved path and refuse to write when it falls
outside destination_path.
Generated-by: Claude Opus 4.8 (1M context)
---
.../providers/samba/transfers/gcs_to_samba.py | 12 +++++++-
.../unit/samba/transfers/test_gcs_to_samba.py | 32 ++++++++++++++++++++++
2 files changed, 43 insertions(+), 1 deletion(-)
diff --git
a/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py
b/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py
index 5816b38c113..950337438ea 100644
--- a/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py
+++ b/providers/samba/src/airflow/providers/samba/transfers/gcs_to_samba.py
@@ -177,7 +177,17 @@ class GCSToSambaOperator(BaseOperator):
source_object = os.path.relpath(source_object, start=prefix)
else:
source_object = os.path.basename(source_object)
- return os.path.join(self.destination_path, source_object)
+ # Source object names come from the GCS bucket and may contain ".."
segments.
+ # Normalize the joined path and make sure it stays within
destination_path so a
+ # crafted object name cannot resolve a write target outside the
configured directory.
+ resolved = os.path.normpath(os.path.join(self.destination_path,
source_object))
+ base = os.path.normpath(self.destination_path)
+ if resolved != base and not resolved.startswith(base + os.sep):
+ raise ValueError(
+ f"Resolved destination path {resolved!r} is outside the
configured "
+ f"destination_path {base!r}; refusing to write outside it."
+ )
+ return resolved
def _copy_single_object(
self,
diff --git a/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py
b/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py
index 45e518e401c..97fb25dcc7c 100644
--- a/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py
+++ b/providers/samba/tests/unit/samba/transfers/test_gcs_to_samba.py
@@ -370,3 +370,35 @@ class TestGoogleCloudStorageToSambaOperator:
)
with pytest.raises(AirflowException):
operator.execute(None)
+
+ @pytest.mark.parametrize(
+ "source_object",
+ [
+ "../../victim_area/payload",
+ "../escape",
+ "subdir/../../escape",
+ ],
+ )
+ def test_resolve_destination_path_rejects_traversal(self, source_object):
+ operator = GCSToSambaOperator(
+ task_id=TASK_ID,
+ source_bucket=TEST_BUCKET,
+ source_object=source_object,
+ destination_path=DESTINATION_SMB,
+ gcp_conn_id=GCP_CONN_ID,
+ samba_conn_id=SAMBA_CONN_ID,
+ )
+ with pytest.raises(ValueError, match="outside the configured"):
+ operator._resolve_destination_path(source_object)
+
+ def test_resolve_destination_path_allows_contained_object(self):
+ operator = GCSToSambaOperator(
+ task_id=TASK_ID,
+ source_bucket=TEST_BUCKET,
+ source_object="dir/file.txt",
+ destination_path=DESTINATION_SMB,
+ gcp_conn_id=GCP_CONN_ID,
+ samba_conn_id=SAMBA_CONN_ID,
+ )
+ resolved = operator._resolve_destination_path("dir/file.txt")
+ assert resolved == os.path.join(DESTINATION_SMB, "dir/file.txt")