This is an automated email from the ASF dual-hosted git repository.

Fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new a99320bf fix(fsspec): handle zero-byte files in `__len__` (#3353)
a99320bf is described below

commit a99320bf8bfe8c81415dc6ff2148d4f5a23067e5
Author: Minh Vu <[email protected]>
AuthorDate: Fri May 15 21:10:00 2026 +0200

    fix(fsspec): handle zero-byte files in `__len__` (#3353)
    
    ## Summary
    
    Fix `FsspecInputFile.__len__` and `FsspecOutputFile.__len__` so
    zero-byte files return `0` instead of being treated as missing metadata.
    
    Both methods previously used truthiness checks on
    `object_info.get(...)`, which caused valid sizes like `0` to fall
    through to the runtime error path.
    
    ## Changes
    
    - check for `Size` key presence explicitly
    - check for `size` key presence explicitly
    - add a regression test covering zero-byte lengths for both metadata key
    variants
    
    ## Verification
    
    - `python -m pytest tests/io/test_fsspec.py -k zero_length_of_file -q`
---
 pyiceberg/io/fsspec.py  | 16 ++++++++--------
 tests/io/test_fsspec.py | 14 ++++++++++++++
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index 63ec55ba..92255cb8 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -335,10 +335,10 @@ class FsspecInputFile(InputFile):
     def __len__(self) -> int:
         """Return the total length of the file, in bytes."""
         object_info = self._fs.info(self.location)
-        if size := object_info.get("Size"):
-            return size
-        elif size := object_info.get("size"):
-            return size
+        if "Size" in object_info:
+            return object_info["Size"]
+        elif "size" in object_info:
+            return object_info["size"]
         raise RuntimeError(f"Cannot retrieve object info: {self.location}")
 
     def exists(self) -> bool:
@@ -379,10 +379,10 @@ class FsspecOutputFile(OutputFile):
     def __len__(self) -> int:
         """Return the total length of the file, in bytes."""
         object_info = self._fs.info(self.location)
-        if size := object_info.get("Size"):
-            return size
-        elif size := object_info.get("size"):
-            return size
+        if "Size" in object_info:
+            return object_info["Size"]
+        elif "size" in object_info:
+            return object_info["size"]
         raise RuntimeError(f"Cannot retrieve object info: {self.location}")
 
     def exists(self) -> bool:
diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py
index bb11fdd7..8739a596 100644
--- a/tests/io/test_fsspec.py
+++ b/tests/io/test_fsspec.py
@@ -146,6 +146,20 @@ def test_fsspec_getting_length_of_file(fsspec_fileio: 
FsspecFileIO) -> None:
     fsspec_fileio.delete(output_file)
 
 
[email protected]("size_key", ["Size", "size"])
+def test_fsspec_getting_zero_length_of_file(size_key: str) -> None:
+    """Test getting zero-byte lengths from object metadata."""
+    location = "s3://warehouse/empty-file"
+    fs = mock.Mock(spec=AbstractFileSystem)
+    fs.info.return_value = {size_key: 0}
+
+    output_file = fsspec.FsspecOutputFile(location=location, fs=fs)
+    assert len(output_file) == 0
+
+    input_file = fsspec.FsspecInputFile(location=location, fs=fs)
+    assert len(input_file) == 0
+
+
 @pytest.mark.s3
 def test_fsspec_file_tell(fsspec_fileio: FsspecFileIO) -> None:
     """Test finding cursor position for an fsspec file-io file"""

Reply via email to