This is an automated email from the ASF dual-hosted git repository.
aicam pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new 976cdae77b fix: raise when presigned URL response is missing or
malformed (#5073)
976cdae77b is described below
commit 976cdae77b63312152b7566caa89ea2557d8965f
Author: Matthew B. <[email protected]>
AuthorDate: Tue May 19 21:43:58 2026 -0700
fix: raise when presigned URL response is missing or malformed (#5073)
### What changes were proposed in this PR?
`DatasetFileDocument.get_presigned_url` previously returned
`response.json().get("presignedUrl")` on a 200 response, so a body that
omitted the key (or wasn't valid JSON) silently returned `None`. This PR
parses the JSON inside a try/except block and validates that
`presignedUrl` is a non-empty string; otherwise, it raises
`RuntimeError` with the response body.
### Any related issues, documentation, or discussions?
Closes: #4725
### How was this PR tested?
Manually reproduced the missing-key case (mocked 200 + empty JSON) and
confirmed the new `RuntimeError` is raised at the presign step instead
of a downstream `requests.get(None)` failure.
### Was this PR authored or co-authored using generative AI tooling?
Co-authored with Claude Opus 4.7 in compliance with ASF
---------
Co-authored-by: ali risheh <[email protected]>
---
.../pytexera/storage/dataset_file_document.py | 16 ++++++++++-
.../pytexera/storage/test_dataset_file_document.py | 32 +++++++++++++++++++---
2 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/amber/src/main/python/pytexera/storage/dataset_file_document.py
b/amber/src/main/python/pytexera/storage/dataset_file_document.py
index 3d07773583..31f95d3fc7 100644
--- a/amber/src/main/python/pytexera/storage/dataset_file_document.py
+++ b/amber/src/main/python/pytexera/storage/dataset_file_document.py
@@ -76,7 +76,21 @@ class DatasetFileDocument:
f"Failed to get presigned URL: {response.status_code}
{response.text}"
)
- return response.json().get("presignedUrl")
+ try:
+ payload = response.json()
+ except ValueError as e:
+ raise RuntimeError(
+ f"Failed to get presigned URL: invalid JSON response:
{response.text}"
+ ) from e
+
+ presigned_url = payload.get("presignedUrl")
+ if not isinstance(presigned_url, str) or not presigned_url:
+ raise RuntimeError(
+ f"Failed to get presigned URL: 'presignedUrl' missing from "
+ f"response: {response.text}"
+ )
+
+ return presigned_url
def read_file(self) -> io.BytesIO:
"""
diff --git
a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py
b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py
index ecf9dd5b8c..680f512072 100644
--- a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py
+++ b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py
@@ -137,13 +137,37 @@ class TestGetPresignedUrl:
with pytest.raises(RuntimeError, match=r"403.*forbidden"):
doc.get_presigned_url()
- def test_returns_none_when_response_body_lacks_presigned_url_key(self,
monkeypatch):
- # Pins current behavior: a 200 with no "presignedUrl" key yields None
- # rather than raising. read_file() will then call requests.get(None).
+ def test_raises_when_response_body_lacks_presigned_url_key(self,
monkeypatch):
doc = self._make_doc(monkeypatch)
with patch("pytexera.storage.dataset_file_document.requests.get") as
mock_get:
mock_get.return_value = make_response(200, body={"other": "value"})
- assert doc.get_presigned_url() is None
+ with pytest.raises(RuntimeError, match="'presignedUrl' missing"):
+ doc.get_presigned_url()
+
+ def test_raises_when_response_body_is_not_valid_json(self, monkeypatch):
+ doc = self._make_doc(monkeypatch)
+ with patch("pytexera.storage.dataset_file_document.requests.get") as
mock_get:
+ response = MagicMock()
+ response.status_code = 200
+ response.json.side_effect = ValueError("Expecting value")
+ response.text = "<html>not json</html>"
+ mock_get.return_value = response
+ with pytest.raises(RuntimeError, match="invalid JSON response"):
+ doc.get_presigned_url()
+
+ def test_raises_when_presigned_url_is_empty_string(self, monkeypatch):
+ doc = self._make_doc(monkeypatch)
+ with patch("pytexera.storage.dataset_file_document.requests.get") as
mock_get:
+ mock_get.return_value = make_response(200, body={"presignedUrl":
""})
+ with pytest.raises(RuntimeError, match="'presignedUrl' missing"):
+ doc.get_presigned_url()
+
+ def test_raises_when_presigned_url_is_not_a_string(self, monkeypatch):
+ doc = self._make_doc(monkeypatch)
+ with patch("pytexera.storage.dataset_file_document.requests.get") as
mock_get:
+ mock_get.return_value = make_response(200, body={"presignedUrl":
None})
+ with pytest.raises(RuntimeError, match="'presignedUrl' missing"):
+ doc.get_presigned_url()
class TestReadFile: