This is an automated email from the ASF dual-hosted git repository.

aicam pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 976cdae77b fix: raise when presigned URL response is missing or 
malformed (#5073)
976cdae77b is described below

commit 976cdae77b63312152b7566caa89ea2557d8965f
Author: Matthew B. <[email protected]>
AuthorDate: Tue May 19 21:43:58 2026 -0700

    fix: raise when presigned URL response is missing or malformed (#5073)
    
    ### What changes were proposed in this PR?
    `DatasetFileDocument.get_presigned_url` previously returned
    `response.json().get("presignedUrl")` on a 200 response, so a body that
    omitted the key (or wasn't valid JSON) silently returned `None`. This PR
    parses the JSON inside a try/except block and validates that
    `presignedUrl` is a non-empty string; otherwise, it raises
    `RuntimeError` with the response body.
      ### Any related issues, documentation, or discussions?
      Closes: #4725
      ### How was this PR tested?
    Manually reproduced the missing-key case (mocked 200 + empty JSON) and
    confirmed the new `RuntimeError` is raised at the presign step instead
    of a downstream `requests.get(None)` failure.
    
      ### Was this PR authored or co-authored using generative AI tooling?
      Co-authored with Claude Opus 4.7 in compliance with ASF
    
    ---------
    
    Co-authored-by: ali risheh <[email protected]>
---
 .../pytexera/storage/dataset_file_document.py      | 16 ++++++++++-
 .../pytexera/storage/test_dataset_file_document.py | 32 +++++++++++++++++++---
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/amber/src/main/python/pytexera/storage/dataset_file_document.py 
b/amber/src/main/python/pytexera/storage/dataset_file_document.py
index 3d07773583..31f95d3fc7 100644
--- a/amber/src/main/python/pytexera/storage/dataset_file_document.py
+++ b/amber/src/main/python/pytexera/storage/dataset_file_document.py
@@ -76,7 +76,21 @@ class DatasetFileDocument:
                 f"Failed to get presigned URL: {response.status_code} 
{response.text}"
             )
 
-        return response.json().get("presignedUrl")
+        try:
+            payload = response.json()
+        except ValueError as e:
+            raise RuntimeError(
+                f"Failed to get presigned URL: invalid JSON response: 
{response.text}"
+            ) from e
+
+        presigned_url = payload.get("presignedUrl")
+        if not isinstance(presigned_url, str) or not presigned_url:
+            raise RuntimeError(
+                f"Failed to get presigned URL: 'presignedUrl' missing from "
+                f"response: {response.text}"
+            )
+
+        return presigned_url
 
     def read_file(self) -> io.BytesIO:
         """
diff --git 
a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py 
b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py
index ecf9dd5b8c..680f512072 100644
--- a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py
+++ b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py
@@ -137,13 +137,37 @@ class TestGetPresignedUrl:
             with pytest.raises(RuntimeError, match=r"403.*forbidden"):
                 doc.get_presigned_url()
 
-    def test_returns_none_when_response_body_lacks_presigned_url_key(self, 
monkeypatch):
-        # Pins current behavior: a 200 with no "presignedUrl" key yields None
-        # rather than raising. read_file() will then call requests.get(None).
+    def test_raises_when_response_body_lacks_presigned_url_key(self, 
monkeypatch):
         doc = self._make_doc(monkeypatch)
         with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
             mock_get.return_value = make_response(200, body={"other": "value"})
-            assert doc.get_presigned_url() is None
+            with pytest.raises(RuntimeError, match="'presignedUrl' missing"):
+                doc.get_presigned_url()
+
+    def test_raises_when_response_body_is_not_valid_json(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            response = MagicMock()
+            response.status_code = 200
+            response.json.side_effect = ValueError("Expecting value")
+            response.text = "<html>not json</html>"
+            mock_get.return_value = response
+            with pytest.raises(RuntimeError, match="invalid JSON response"):
+                doc.get_presigned_url()
+
+    def test_raises_when_presigned_url_is_empty_string(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
""})
+            with pytest.raises(RuntimeError, match="'presignedUrl' missing"):
+                doc.get_presigned_url()
+
+    def test_raises_when_presigned_url_is_not_a_string(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
None})
+            with pytest.raises(RuntimeError, match="'presignedUrl' missing"):
+                doc.get_presigned_url()
 
 
 class TestReadFile:

Reply via email to