paleolimbot commented on code in PR #378:
URL: https://github.com/apache/arrow-nanoarrow/pull/378#discussion_r1483604522


##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -176,6 +178,188 @@ cdef object alloc_c_array_shallow_copy(object base, const 
ArrowArray* c_array) n
     return array_capsule
 
 
+cdef void pycapsule_buffer_deleter(object stream_capsule) noexcept:
+    cdef ArrowBuffer* buffer = <ArrowBuffer*>PyCapsule_GetPointer(
+        stream_capsule, 'nanoarrow_buffer'
+    )
+
+    ArrowBufferReset(buffer)
+    ArrowFree(buffer)
+
+
+cdef object alloc_c_buffer(ArrowBuffer** c_buffer) noexcept:
+    c_buffer[0] = <ArrowBuffer*> ArrowMalloc(sizeof(ArrowBuffer))
+    ArrowBufferInit(c_buffer[0])
+    return PyCapsule_New(c_buffer[0], 'nanoarrow_buffer', 
&pycapsule_buffer_deleter)
+
+cdef void c_deallocate_pybuffer(ArrowBufferAllocator* allocator, uint8_t* ptr, 
int64_t size) noexcept with gil:
+    cdef Py_buffer* buffer = <Py_buffer*>allocator.private_data
+    PyBuffer_Release(buffer)
+    ArrowFree(buffer)
+
+
+cdef ArrowBufferAllocator c_pybuffer_deallocator(Py_buffer* buffer):
+    # This should probably be changed in nanoarrow C; however, currently, the 
deallocator
+    # won't get called if buffer.buf is NULL.
+    if buffer.buf == NULL:
+        PyBuffer_Release(buffer)
+        return ArrowBufferAllocatorDefault()
+
+    cdef Py_buffer* allocator_private = 
<Py_buffer*>ArrowMalloc(sizeof(Py_buffer))
+    if allocator_private == NULL:
+        PyBuffer_Release(buffer)
+        raise MemoryError()
+
+    memcpy(allocator_private, buffer, sizeof(Py_buffer))
+    return 
ArrowBufferDeallocator(<ArrowBufferDeallocatorCallback>&c_deallocate_pybuffer, 
allocator_private)
+
+
+cdef c_arrow_type_from_format(format):
+    # PyBuffer_SizeFromFormat() was added in Python 3.9 (potentially faster)
+    item_size = calcsize(format)
+
+    # Don't allow non-native endian values
+    if sys_byteorder == "little" and (">" in format or "!" in format):
+        raise ValueError(f"Can't convert format '{format}' to Arrow type")
+    elif sys_byteorder == "big" and  "<" in format:
+        raise ValueError(f"Can't convert format '{format}' to Arrow type")
+
+    # Strip system endian specifiers
+    format = format.strip("=@")
+
+    if format == "c":
+        return 0, NANOARROW_TYPE_STRING
+    elif format == "e":
+        return item_size, NANOARROW_TYPE_HALF_FLOAT
+    elif format == "f":
+        return item_size, NANOARROW_TYPE_FLOAT
+    elif format == "d":
+        return item_size, NANOARROW_TYPE_DOUBLE
+
+    # Check for signed integers
+    if format in ("b", "?", "h", "i", "l", "q", "n"):
+        if item_size == 1:
+            return item_size, NANOARROW_TYPE_INT8
+        elif item_size == 2:
+            return item_size, NANOARROW_TYPE_INT16
+        elif item_size == 4:
+            return item_size, NANOARROW_TYPE_INT32
+        elif item_size == 8:
+            return item_size, NANOARROW_TYPE_INT64
+
+    # Check for unsinged integers
+    if format in ("B", "H", "I", "L", "Q", "N"):
+        if item_size == 1:
+            return item_size, NANOARROW_TYPE_UINT8
+        elif item_size == 2:
+            return item_size, NANOARROW_TYPE_UINT16
+        elif item_size == 4:
+            return item_size, NANOARROW_TYPE_UINT32
+        elif item_size == 8:
+            return item_size, NANOARROW_TYPE_UINT64
+
+    # If all else fails, return opaque fixed-size binary
+    return item_size, NANOARROW_TYPE_BINARY
+
+
+cdef int c_format_from_arrow_type(ArrowType type_id, int element_size_bits, 
size_t out_size, char* out):
+    if type_id in (NANOARROW_TYPE_BINARY, NANOARROW_TYPE_FIXED_SIZE_BINARY) 
and element_size_bits > 0:
+        snprintf(out, out_size, "%ds", <int>(element_size_bits // 8))
+        return element_size_bits
+
+    cdef const char* format_const = ""
+    cdef int element_size_bits_calc = 0
+    if type_id == NANOARROW_TYPE_STRING:
+        format_const = "c"
+        element_size_bits_calc = 0
+    elif type_id == NANOARROW_TYPE_BINARY:
+        format_const = "B"
+        element_size_bits_calc = 0
+    elif type_id == NANOARROW_TYPE_BOOL:
+        # Bitmaps export as unspecified binary
+        format_const = "B"
+        element_size_bits_calc = 1
+    elif type_id == NANOARROW_TYPE_INT8:
+        format_const = "b"
+        element_size_bits_calc = 8
+    elif type_id == NANOARROW_TYPE_UINT8:
+        format_const = "B"
+        element_size_bits_calc = 8
+    elif type_id == NANOARROW_TYPE_INT16:
+        format_const = "h"
+        element_size_bits_calc = 16
+    elif type_id == NANOARROW_TYPE_UINT16:
+        format_const = "H"
+        element_size_bits_calc = 16
+    elif type_id in (NANOARROW_TYPE_INT32, NANOARROW_TYPE_INTERVAL_MONTHS):
+        format_const = "i"
+        element_size_bits_calc = 32
+    elif type_id == NANOARROW_TYPE_UINT32:
+        format_const = "I"
+        element_size_bits_calc = 32
+    elif type_id == NANOARROW_TYPE_INT64:
+        format_const = "q"
+        element_size_bits_calc = 64
+    elif type_id == NANOARROW_TYPE_UINT64:
+        format_const = "Q"
+        element_size_bits_calc = 64
+    elif type_id == NANOARROW_TYPE_HALF_FLOAT:
+        format_const = "e"
+        element_size_bits_calc = 16
+    elif type_id == NANOARROW_TYPE_FLOAT:
+        format_const = "f"
+        element_size_bits_calc = 32
+    elif type_id == NANOARROW_TYPE_DOUBLE:
+        format_const = "d"
+        element_size_bits_calc = 64
+    elif type_id == NANOARROW_TYPE_INTERVAL_DAY_TIME:
+        format_const = "ii"
+        element_size_bits_calc = 64
+    elif type_id == NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+        format_const = "iiq"
+        element_size_bits_calc = 128
+    elif type_id == NANOARROW_TYPE_DECIMAL128:
+        format_const = "16s"
+        element_size_bits_calc = 128
+    elif type_id == NANOARROW_TYPE_DECIMAL256:
+        format_const = "32s"
+        element_size_bits_calc = 256
+    else:
+        raise ValueError(f"Unsupported Arrow type_id for format conversion: 
{type_id}")
+
+    snprintf(out, out_size, "%s", format_const)
+    return element_size_bits_calc
+
+
+cdef object c_buffer_set_pybuffer(object obj, ArrowBuffer** c_buffer):
+    ArrowBufferReset(c_buffer[0])
+
+    cdef Py_buffer buffer
+    cdef int rc = PyObject_GetBuffer(obj, &buffer, PyBUF_FORMAT | 
PyBUF_ANY_CONTIGUOUS)
+    if rc != 0:
+        raise BufferError()
+
+    # Parse the buffer's format string to get the ArrowType and element size
+    try:
+        if buffer.format == NULL:
+            format = "B"
+        else:
+            format = buffer.format.decode("UTF-8")
+    except Exception as e:
+        PyBuffer_Release(&buffer)
+        raise e
+
+    # Transfers ownership of buffer to c_buffer, whose finalizer will be 
called by
+    # the capsule when the capsule is deleted or garbage collected
+    c_buffer[0].data = <uint8_t*>buffer.buf
+    c_buffer[0].size_bytes = <int64_t>buffer.len
+    c_buffer[0].capacity_bytes = 0
+    c_buffer[0].allocator = c_pybuffer_deallocator(&buffer)

Review Comment:
   `PyObject_GetBuffer()` is sufficient (i.e., if you forget to 
`PyBuffer_Release()`, you will leak). I don't think there is a way to increment 
the reference (it's not a `PyObject`) and the buffer protocol documentation 
seemed to indicate that one should not increment or decrement the reference 
count of `buffer.obj`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to