milesgranger commented on code in PR #13793:
URL: https://github.com/apache/arrow/pull/13793#discussion_r937433923
##########
python/pyarrow/tests/test_io.py:
##########
@@ -126,6 +126,44 @@ def test_python_file_read():
pa.PythonFile(StringIO(), mode='r')
[email protected]("nbytes", (0, 1, 5, 100))
[email protected]("file_offset", (0, 5, 100))
+def test_python_file_get_stream(nbytes, file_offset):
+
+ data = b'data1data2data3data4data5'
+
+ f = pa.PythonFile(BytesIO(data), mode='r')
+ stream = f.get_stream(file_offset=file_offset, nbytes=nbytes)
+
+ # Subsequent calls to 'read' should match behavior if same
+ # data passed to BytesIO where get_stream should handle if
+ # nbytes/file_offset results in no bytes b/c out of bounds.
+ start = min(file_offset, len(data))
+ end = min(file_offset + nbytes, len(data))
+ buf = BytesIO(data[start:end])
+
+ # read some chunks
+ assert stream.read(nbytes=4) == buf.read(4)
+ assert stream.read(nbytes=6) == buf.read(6)
+
+ # Read to end of each stream
+ assert stream.read() == buf.read()
+
+ # Try reading passed the stream
+ n = len(data) * 2
+ assert stream.read(n) == buf.read(n)
+
+ # NativeFile[CInputStream] is not seekable
+ with pytest.raises(OSError) as e:
+ stream.seek(0)
+
+ # some error about not being seekable
+ assert e.match("seekable")
+
+ stream.close()
+ assert stream.closed
+
+
Review Comment:
A `ValueError` for negative file_offset, and appears a negative `nbytes` is
the same as wanting all the remaining data.
```python
>>> with pa.PythonFile(io.BytesIO(b'data'), mode='r') as f:
... stream = f.get_stream(file_offset=-1, nbytes=3)
... stream.read()
...
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
File "pyarrow/io.pxi", line 378, in pyarrow.lib.NativeFile.read
chunk = self.read(bs)
File "pyarrow/io.pxi", line 395, in pyarrow.lib.NativeFile.read
bytes_read = GetResultValue(handle.get().Read(c_nbytes, buf))
File "pyarrow/error.pxi", line 144, in
pyarrow.lib.pyarrow_internal_check_status
return check_status(status)
ValueError: negative seek value -1
>>>
>>> with pa.PythonFile(io.BytesIO(b'data'), mode='r') as f:
... stream = f.get_stream(file_offset=1, nbytes=-100)
... stream.read()
...
b'ata'
```
I can add to the test to check `ValueError` for negative `file_offset` and
negative `nbytes` is same as all data?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]