Re: [PR] feat(python): Add array creation/building from buffers [arrow-nanoarrow]

via GitHub Thu, 08 Feb 2024 13:29:52 -0800


paleolimbot commented on code in PR #378:
URL: https://github.com/apache/arrow-nanoarrow/pull/378#discussion_r1483611666



##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -176,6 +178,188 @@ cdef object alloc_c_array_shallow_copy(object base, const 
ArrowArray* c_array) n
     return array_capsule
 
 
+cdef void pycapsule_buffer_deleter(object stream_capsule) noexcept:
+    cdef ArrowBuffer* buffer = <ArrowBuffer*>PyCapsule_GetPointer(
+        stream_capsule, 'nanoarrow_buffer'
+    )
+
+    ArrowBufferReset(buffer)
+    ArrowFree(buffer)
+
+
+cdef object alloc_c_buffer(ArrowBuffer** c_buffer) noexcept:
+    c_buffer[0] = <ArrowBuffer*> ArrowMalloc(sizeof(ArrowBuffer))
+    ArrowBufferInit(c_buffer[0])
+    return PyCapsule_New(c_buffer[0], 'nanoarrow_buffer', 
&pycapsule_buffer_deleter)
+
+cdef void c_deallocate_pybuffer(ArrowBufferAllocator* allocator, uint8_t* ptr, 
int64_t size) noexcept with gil:
+    cdef Py_buffer* buffer = <Py_buffer*>allocator.private_data
+    PyBuffer_Release(buffer)
+    ArrowFree(buffer)
+
+
+cdef ArrowBufferAllocator c_pybuffer_deallocator(Py_buffer* buffer):
+    # This should probably be changed in nanoarrow C; however, currently, the 
deallocator
+    # won't get called if buffer.buf is NULL.
+    if buffer.buf == NULL:
+        PyBuffer_Release(buffer)
+        return ArrowBufferAllocatorDefault()
+
+    cdef Py_buffer* allocator_private = 
<Py_buffer*>ArrowMalloc(sizeof(Py_buffer))
+    if allocator_private == NULL:
+        PyBuffer_Release(buffer)
+        raise MemoryError()
+
+    memcpy(allocator_private, buffer, sizeof(Py_buffer))
+    return 
ArrowBufferDeallocator(<ArrowBufferDeallocatorCallback>&c_deallocate_pybuffer, 
allocator_private)
+
+
+cdef c_arrow_type_from_format(format):
+    # PyBuffer_SizeFromFormat() was added in Python 3.9 (potentially faster)
+    item_size = calcsize(format)
+
+    # Don't allow non-native endian values
+    if sys_byteorder == "little" and (">" in format or "!" in format):
+        raise ValueError(f"Can't convert format '{format}' to Arrow type")
+    elif sys_byteorder == "big" and  "<" in format:
+        raise ValueError(f"Can't convert format '{format}' to Arrow type")
+
+    # Strip system endian specifiers
+    format = format.strip("=@")
+
+    if format == "c":
+        return 0, NANOARROW_TYPE_STRING

Review Comment:
   Whoops, it's a little different...more like if you call 
`c_buffer(np.array(c_buffer(<that's a string>)))` you can get the roundtrip. 
`np.array(list("abcdefg"))` seems to do something slightly different.
   
   ```
   import numpy as np
   from nanoarrow.c_lib import c_buffer
   np.array(c_buffer(b"abcdefg").set_format("c").data)
   #> array([b'a', b'b', b'c', b'd', b'e', b'f', b'g'], dtype='|S1')
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(python): Add array creation/building from buffers [arrow-nanoarrow]

Reply via email to