joellubi commented on code in PR #43488:
URL: https://github.com/apache/arrow/pull/43488#discussion_r1706066173


##########
python/pyarrow/array.pxi:
##########
@@ -4447,6 +4447,69 @@ cdef class FixedShapeTensorArray(ExtensionArray):
             FixedSizeListArray.from_arrays(values, shape[1:].prod())
         )
 
+cdef class Bool8Array(ExtensionArray):
+    """
+    Concrete class for bool8 extension arrays.
+    Examples
+    --------
+    Define the extension type for an bool8 array
+    >>> import pyarrow as pa
+    >>> bool8_type = pa.bool8()
+    Create an extension array
+    >>> arr = [-1, 0, 1, 2, None]
+    >>> storage = pa.array(arr, pa.int8())
+    >>> pa.ExtensionArray.from_storage(bool8_type, storage)
+    <pyarrow.lib.Bool8Array object at ...>
+    [
+      -1,
+      0,
+      1,
+      2,
+      null
+    ]
+    """
+
+    def to_numpy(self, zero_copy_only=True, writable=False):
+        try:
+            return self.storage.to_numpy().view(np.bool_)
+        except ArrowInvalid as e:
+            if zero_copy_only:
+                raise e
+
+        return _pc().not_equal(self.storage, 
0).to_numpy(zero_copy_only=zero_copy_only, writable=writable)
+
+    @staticmethod
+    def from_numpy(obj):
+        """
+        Convert numpy array to a bool8 extension array without making a copy.
+        The input array must be 1-dimensional, with either bool_ or int8 dtype.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = np.array([True, False, True], dtype=np.bool_)
+        >>> pa.Bool8Array.from_numpy(arr)
+        <pyarrow.lib.Bool8Array object at ...>
+        [
+          1,
+          0,
+          1
+        ]
+        """
+
+        if obj.ndim != 1:
+            raise ValueError(f"Cannot convert {obj.ndim}-D array to bool8 
array")
+        
+        if obj.dtype not in [np.bool_, np.int8]:
+            raise TypeError(f"Array dtype {obj.dtype} incompatible with bool8 
storage")
+
+        buf = foreign_buffer(obj.ctypes.data, obj.size)
+        return Array.from_buffers(bool8(), obj.size, [None, buf])

Review Comment:
   I gave this a try and it works if the numpy array has `dtype=np.int8`:
   ```python
   np_arr = np.array([1, 0, 1], dtype=np.int8)
   pa_storage_arr = pa.array(np_arr, type=pa.int8())
   pa_bool8_arr = pa.ExtensionArray.from_storage(pa.bool8(), pa_storage_arr)
   ```
   
   This does not produce any copies. The existing approach of using 
`foreign_buffer` also works with `np_arr = np.array([True, False, True], 
dtype=np.bool_)` without making a copy.
   
   However using the `pa.array()` constuctor currently does make a copy when 
going bool -> int8. I think this would require a zero-copy casting kernel to be 
added to C++. That seems like it would be a better approach, I just have to 
wrap my head around that part of the code.
   
   CC: @felipecrv does this sound right ^?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to