[ https://issues.apache.org/jira/browse/ARROW-2155?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16365430#comment-16365430 ]
ASF GitHub Bot commented on ARROW-2155: --------------------------------------- pitrou closed pull request #1607: ARROW-2155: [Python] frombuffer() should respect mutability of argument URL: https://github.com/apache/arrow/pull/1607 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc index 14a8ae6fd..2699d51ba 100644 --- a/cpp/src/arrow/python/common.cc +++ b/cpp/src/arrow/python/common.cc @@ -23,6 +23,7 @@ #include "arrow/memory_pool.h" #include "arrow/status.h" +#include "arrow/util/logging.h" namespace arrow { namespace py { @@ -47,20 +48,25 @@ MemoryPool* get_memory_pool() { // ---------------------------------------------------------------------- // PyBuffer -PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) { - if (PyObject_CheckBuffer(obj)) { - obj_ = PyMemoryView_FromObject(obj); - Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_); - data_ = reinterpret_cast<const uint8_t*>(buffer->buf); - size_ = buffer->len; - capacity_ = buffer->len; - is_mutable_ = false; +PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0) { + if (!PyObject_GetBuffer(obj, &py_buf_, PyBUF_ANY_CONTIGUOUS)) { + data_ = reinterpret_cast<const uint8_t*>(py_buf_.buf); + DCHECK(data_ != nullptr); + size_ = py_buf_.len; + capacity_ = py_buf_.len; + is_mutable_ = !py_buf_.readonly; + } else { + // Cannot propagate error directly, caller should check the data_ + // field for null + PyErr_Clear(); } } PyBuffer::~PyBuffer() { - PyAcquireGIL lock; - Py_XDECREF(obj_); + if (data_ != nullptr) { + PyAcquireGIL lock; + PyBuffer_Release(&py_buf_); + } } Status CheckPyError(StatusCode code) { diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h index b1e0888af..59d2c4808 100644 --- a/cpp/src/arrow/python/common.h +++ b/cpp/src/arrow/python/common.h @@ -142,13 +142,13 @@ class ARROW_EXPORT PyBuffer : public Buffer { public: /// Note that the GIL must be held when calling the PyBuffer constructor. /// - /// While memoryview objects support multi-demensional buffers, PyBuffer only supports + /// While memoryview objects support multi-dimensional buffers, PyBuffer only supports /// one-dimensional byte buffers. explicit PyBuffer(PyObject* obj); ~PyBuffer(); private: - PyObject* obj_; + Py_buffer py_buf_; }; } // namespace py diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index bd508cf57..2d9c7f413 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -802,12 +802,12 @@ def frombuffer(object obj): Construct an Arrow buffer from a Python bytes object """ cdef shared_ptr[CBuffer] buf - try: - memoryview(obj) - buf.reset(new PyBuffer(obj)) - return pyarrow_wrap_buffer(buf) - except TypeError: + buf.reset(new PyBuffer(obj)) + if buf.get().data() == NULL: + # XXX should be TypeError? Can we find a way to propagate the + # original error instead? raise ValueError('Must pass object that implements buffer protocol') + return pyarrow_wrap_buffer(buf) cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader): diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index da26b101d..fd3744c18 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -132,6 +132,7 @@ def test_buffer_bytes(): buf = pa.frombuffer(val) assert isinstance(buf, pa.Buffer) + assert not buf.is_mutable result = buf.to_pybytes() @@ -143,6 +144,7 @@ def test_buffer_memoryview(): buf = pa.frombuffer(val) assert isinstance(buf, pa.Buffer) + assert not buf.is_mutable result = memoryview(buf) @@ -154,13 +156,19 @@ def test_buffer_bytearray(): buf = pa.frombuffer(val) assert isinstance(buf, pa.Buffer) + assert buf.is_mutable result = bytearray(buf) assert result == val -def test_buffer_numpy(): +def test_buffer_invalid(): + with pytest.raises(ValueError, match="buffer protocol"): + pa.frombuffer(None) + + +def test_buffer_to_numpy(): # Make sure creating a numpy array from an arrow buffer works byte_array = bytearray(20) byte_array[0] = 42 @@ -170,6 +178,19 @@ def test_buffer_numpy(): assert array.base == buf +def test_buffer_from_numpy(): + # C-contiguous + arr = np.arange(12, dtype=np.int8).reshape((3, 4)) + buf = pa.frombuffer(arr) + assert buf.to_pybytes() == arr.tobytes() + # F-contiguous; note strides informations is lost + buf = pa.frombuffer(arr.T) + assert buf.to_pybytes() == arr.tobytes() + # Non-contiguous + with pytest.raises(ValueError, match="buffer protocol"): + buf = pa.frombuffer(arr.T[::2]) + + def test_allocate_buffer(): buf = pa.allocate_buffer(100) assert buf.size == 100 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > [Python] pa.frombuffer(bytearray) returns immutable Buffer > ---------------------------------------------------------- > > Key: ARROW-2155 > URL: https://issues.apache.org/jira/browse/ARROW-2155 > Project: Apache Arrow > Issue Type: Improvement > Components: Python > Affects Versions: 0.8.0 > Reporter: Antoine Pitrou > Assignee: Antoine Pitrou > Priority: Minor > Labels: pull-request-available > Fix For: 0.9.0 > > > I'd expect it to return a mutable buffer: > {code:python} > >>> pa.frombuffer(bytearray(10)).is_mutable > False > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)