This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 16547dfef2e Validate downloaded paths stay within the destination
directory in SFTPHook.retrieve_directory (#67985)
16547dfef2e is described below
commit 16547dfef2e09c3f56fa5af978d075ce1ca65fb6
Author: Jarek Potiuk <[email protected]>
AuthorDate: Thu Jun 4 15:33:34 2026 +0200
Validate downloaded paths stay within the destination directory in
SFTPHook.retrieve_directory (#67985)
SFTPHook.retrieve_directory and retrieve_directory_concurrently build each
local destination path by joining the local directory with a path derived
from directory-entry names returned by the remote SFTP server. Because those
names can contain ".." components, the recursive download could write
outside
the configured local destination directory.
Add a containment check (_validate_within_directory) that resolves each
computed local path and refuses to write when it falls outside the
destination directory, applied to both the serial and concurrent retrieval
paths.
Generated-by: Claude Opus 4.8 (1M context) following the guidelines at
https://github.com/apache/airflow/blob/main/contributing-docs/05_pull_requests.rst#gen-ai-assisted-contributions
---
.../sftp/src/airflow/providers/sftp/hooks/sftp.py | 37 +++++++++++++++++++---
providers/sftp/tests/unit/sftp/hooks/test_sftp.py | 23 ++++++++++++++
2 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/providers/sftp/src/airflow/providers/sftp/hooks/sftp.py
b/providers/sftp/src/airflow/providers/sftp/hooks/sftp.py
index 2f31c66d3b2..bb272348974 100644
--- a/providers/sftp/src/airflow/providers/sftp/hooks/sftp.py
+++ b/providers/sftp/src/airflow/providers/sftp/hooks/sftp.py
@@ -384,6 +384,25 @@ class SFTPHook(SSHHook):
"""
self.conn.remove(path) # type: ignore[arg-type, union-attr]
+ @staticmethod
+ def _validate_within_directory(base_dir: str, candidate: str) -> str:
+ """
+ Ensure ``candidate`` resolves to a path inside ``base_dir``.
+
+ Directory-entry names are returned by the remote SFTP server and may
+ contain ``..`` components; joining them into the local destination path
+ could otherwise write outside it. Containment is verified before any
+ local write or ``mkdir``.
+ """
+ base_real = os.path.realpath(base_dir)
+ candidate_real = os.path.realpath(candidate)
+ if candidate_real != base_real and os.path.commonpath([base_real,
candidate_real]) != base_real:
+ raise ValueError(
+ f"Refusing to write outside the destination directory: "
+ f"{candidate!r} resolves outside {base_dir!r}"
+ )
+ return candidate
+
def retrieve_directory(self, remote_full_path: str, local_full_path: str,
prefetch: bool = True) -> None:
"""
Transfer the remote directory to a local location.
@@ -400,10 +419,14 @@ class SFTPHook(SSHHook):
Path(local_full_path).mkdir(parents=True)
files, dirs, _ = self.get_tree_map(remote_full_path)
for dir_path in dirs:
- new_local_path = os.path.join(local_full_path,
os.path.relpath(dir_path, remote_full_path))
+ new_local_path = self._validate_within_directory(
+ local_full_path, os.path.join(local_full_path,
os.path.relpath(dir_path, remote_full_path))
+ )
Path(new_local_path).mkdir(parents=True, exist_ok=True)
for file_path in files:
- new_local_path = os.path.join(local_full_path,
os.path.relpath(file_path, remote_full_path))
+ new_local_path = self._validate_within_directory(
+ local_full_path, os.path.join(local_full_path,
os.path.relpath(file_path, remote_full_path))
+ )
self.retrieve_file(file_path, new_local_path, prefetch)
def retrieve_directory_concurrently(
@@ -438,12 +461,18 @@ class SFTPHook(SSHHook):
new_local_file_paths, remote_file_paths = [], []
files, dirs, _ = self.get_tree_map(remote_full_path)
for dir_path in dirs:
- new_local_path = os.path.join(local_full_path,
os.path.relpath(dir_path, remote_full_path))
+ new_local_path = self._validate_within_directory(
+ local_full_path,
+ os.path.join(local_full_path, os.path.relpath(dir_path,
remote_full_path)),
+ )
Path(new_local_path).mkdir(parents=True, exist_ok=True)
for file in files:
remote_file_paths.append(file)
new_local_file_paths.append(
- os.path.join(local_full_path, os.path.relpath(file,
remote_full_path))
+ self._validate_within_directory(
+ local_full_path,
+ os.path.join(local_full_path, os.path.relpath(file,
remote_full_path)),
+ )
)
remote_file_chunks = [remote_file_paths[i::workers] for i in
range(workers)]
local_file_chunks = [new_local_file_paths[i::workers] for i in
range(workers)]
diff --git a/providers/sftp/tests/unit/sftp/hooks/test_sftp.py
b/providers/sftp/tests/unit/sftp/hooks/test_sftp.py
index 902a191041b..41ab0878ec4 100644
--- a/providers/sftp/tests/unit/sftp/hooks/test_sftp.py
+++ b/providers/sftp/tests/unit/sftp/hooks/test_sftp.py
@@ -633,6 +633,29 @@ class TestSFTPHook:
)
assert retrieved_dir_name in os.listdir(os.path.join(self.temp_dir,
TMP_DIR_FOR_TESTS))
+ def test_validate_within_directory_rejects_escape(self):
+ base = os.path.join(self.temp_dir, "download")
+ with pytest.raises(ValueError, match="outside the destination
directory"):
+ SFTPHook._validate_within_directory(base, os.path.join(base, "..",
"victim"))
+ # An in-bounds candidate is returned unchanged.
+ inside = os.path.join(base, "sub", "file")
+ assert SFTPHook._validate_within_directory(base, inside) == inside
+
+ def test_retrieve_directory_rejects_server_path_traversal(self):
+ # A remote SFTP server can return a directory-entry name containing
".."
+ # so the recursive download would escape the local destination
directory.
+ remote = "/srv/export"
+ local = os.path.join(self.temp_dir, "download_traversal")
+ escaping_file = "/srv/export/../victim/payload"
+ with (
+ patch.object(SFTPHook, "get_tree_map",
return_value=([escaping_file], [], [])),
+ patch.object(SFTPHook, "retrieve_file") as mock_retrieve,
+ ):
+ with pytest.raises(ValueError, match="outside the destination
directory"):
+ self.hook.retrieve_directory(remote_full_path=remote,
local_full_path=local)
+ mock_retrieve.assert_not_called()
+ assert not os.path.exists(os.path.join(self.temp_dir, "victim"))
+
@patch("paramiko.SSHClient")
@patch("paramiko.ProxyCommand")
@patch("airflow.providers.sftp.hooks.sftp.SFTPHook.get_connection")