This is an automated email from the ASF dual-hosted git repository.
Fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new a99320bf fix(fsspec): handle zero-byte files in `__len__` (#3353)
a99320bf is described below
commit a99320bf8bfe8c81415dc6ff2148d4f5a23067e5
Author: Minh Vu <[email protected]>
AuthorDate: Fri May 15 21:10:00 2026 +0200
fix(fsspec): handle zero-byte files in `__len__` (#3353)
## Summary
Fix `FsspecInputFile.__len__` and `FsspecOutputFile.__len__` so
zero-byte files return `0` instead of being treated as missing metadata.
Both methods previously used truthiness checks on
`object_info.get(...)`, which caused valid sizes like `0` to fall
through to the runtime error path.
## Changes
- check for `Size` key presence explicitly
- check for `size` key presence explicitly
- add a regression test covering zero-byte lengths for both metadata key
variants
## Verification
- `python -m pytest tests/io/test_fsspec.py -k zero_length_of_file -q`
---
pyiceberg/io/fsspec.py | 16 ++++++++--------
tests/io/test_fsspec.py | 14 ++++++++++++++
2 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index 63ec55ba..92255cb8 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -335,10 +335,10 @@ class FsspecInputFile(InputFile):
def __len__(self) -> int:
"""Return the total length of the file, in bytes."""
object_info = self._fs.info(self.location)
- if size := object_info.get("Size"):
- return size
- elif size := object_info.get("size"):
- return size
+ if "Size" in object_info:
+ return object_info["Size"]
+ elif "size" in object_info:
+ return object_info["size"]
raise RuntimeError(f"Cannot retrieve object info: {self.location}")
def exists(self) -> bool:
@@ -379,10 +379,10 @@ class FsspecOutputFile(OutputFile):
def __len__(self) -> int:
"""Return the total length of the file, in bytes."""
object_info = self._fs.info(self.location)
- if size := object_info.get("Size"):
- return size
- elif size := object_info.get("size"):
- return size
+ if "Size" in object_info:
+ return object_info["Size"]
+ elif "size" in object_info:
+ return object_info["size"]
raise RuntimeError(f"Cannot retrieve object info: {self.location}")
def exists(self) -> bool:
diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py
index bb11fdd7..8739a596 100644
--- a/tests/io/test_fsspec.py
+++ b/tests/io/test_fsspec.py
@@ -146,6 +146,20 @@ def test_fsspec_getting_length_of_file(fsspec_fileio:
FsspecFileIO) -> None:
fsspec_fileio.delete(output_file)
[email protected]("size_key", ["Size", "size"])
+def test_fsspec_getting_zero_length_of_file(size_key: str) -> None:
+ """Test getting zero-byte lengths from object metadata."""
+ location = "s3://warehouse/empty-file"
+ fs = mock.Mock(spec=AbstractFileSystem)
+ fs.info.return_value = {size_key: 0}
+
+ output_file = fsspec.FsspecOutputFile(location=location, fs=fs)
+ assert len(output_file) == 0
+
+ input_file = fsspec.FsspecInputFile(location=location, fs=fs)
+ assert len(input_file) == 0
+
+
@pytest.mark.s3
def test_fsspec_file_tell(fsspec_fileio: FsspecFileIO) -> None:
"""Test finding cursor position for an fsspec file-io file"""