paleolimbot commented on code in PR #378:
URL: https://github.com/apache/arrow-nanoarrow/pull/378#discussion_r1484934590
##########
python/src/nanoarrow/c_lib.py:
##########
@@ -125,10 +138,205 @@ def c_array(obj=None, requested_schema=None) -> CArray:
out = CArray.allocate(CSchema.allocate())
obj._export_to_c(out._addr(), out.schema._addr())
return out
- else:
+
+ # Try buffer protocol (e.g., numpy arrays)
+ try:
+ return c_array_from_pybuffer(obj)
+ except Exception as e:
raise TypeError(
f"Can't convert object of type {type(obj).__name__} to
nanoarrow.c_array"
+ ) from e
+
+
+def c_array_from_pybuffer(obj) -> CArray:
+ """Create an ArrowArray wrapper from the Python buffer protocol
+
+ Invokes the Python buffer protocol to wrap the buffer represented by obj
+ if possible.
+
+ Examples
+ --------
+
+ >>> import nanoarrow as na
+ >>> from nanoarrow.c_lib import c_array_from_pybuffer
+ >>> na.c_array_view(c_array_from_pybuffer(b"1234"))
+ <nanoarrow.c_lib.CArrayView>
+ - storage_type: 'uint8'
+ - length: 4
+ - offset: 0
+ - null_count: 0
+ - buffers[2]:
+ - validity <bool[0 b] >
+ - data <uint8[4 b] 49 50 51 52>
+ - dictionary: NULL
+ - children[0]:
+ """
+
+ buffer = CBuffer().set_pybuffer(obj)
+ view = buffer.data
+ type_id = view.data_type_id
+ element_size_bits = view.element_size_bits
+
+ builder = CArrayBuilder.allocate()
+
+ # Fixed-size binary needs a schema
+ if type_id == CArrowType.BINARY and element_size_bits != 0:
+ c_schema = (
+ CSchemaBuilder.allocate()
+ .set_type_fixed_size(CArrowType.FIXED_SIZE_BINARY,
element_size_bits // 8)
+ .finish()
)
+ builder.init_from_schema(c_schema)
+ elif type_id == CArrowType.STRING:
+ builder.init_from_type(int(CArrowType.INT8))
+ elif type_id == CArrowType.BINARY:
+ builder.init_from_type(int(CArrowType.UINT8))
+ else:
+ builder.init_from_type(int(type_id))
+
+ # Set the length
+ builder.set_length(len(view))
+
+ # Move ownership of the ArrowBuffer wrapped by buffer to builder.buffer(1)
+ builder.set_buffer(1, buffer)
+
+ # No nulls or offset from a PyBuffer
+ builder.set_null_count(0)
+ builder.set_offset(0)
+
+ return builder.finish()
+
+
+def c_array_empty(schema) -> CArray:
Review Comment:
I special-cased empty iterables so that you can do `c_array([], schema)`!
I'm a tiny bit worried that somebody would have something like:
```python
def some_method(array, schema):
array = c_array(array)
schema = c_schema(schema)
```
...and if somebody mixes up the argument order, they might get `array` as an
empty array and `schema` as the Array's schema.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]