This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 8aea70c  ARROW-3540: [Rust] Incorporate BooleanArray into 
PrimitiveArray
8aea70c is described below

commit 8aea70c6932206e685c7508e660d57892f9f44a9
Author: Paddy Horan <[email protected]>
AuthorDate: Wed Oct 17 16:40:30 2018 +0200

    ARROW-3540: [Rust] Incorporate BooleanArray into PrimitiveArray
    
    Currently we have a specific implementation for `BooleanArray` 
(bit-packing), but due to the `ArrowPrimitiveType` trait which we use as a 
trait bound in many places `PrimitiveArray<bool>` is still a valid type.  
`make_array` actually uses `PrimitiveArray<bool>` which may be a bug but would 
be fixed by this PR anyway.
    
    This PR moves the implementation of `BooleanArray` into 
`PrimitiveArray<bool>`, this would allow us to use the `ArrayPrimitiveType` 
trait as a bound more consistently.  i.e. `PrimitiveArrayBuilder<T>` could 
return `PrimitiveArray<T>` instead of having a separate `BooleanArrayBuilder`.
    
    cc @kszucs @sunchao @andygrove @crepererum
    
    Author: Paddy Horan <[email protected]>
    
    Closes #2778 from paddyhoran/bool_array_updates and squashes the following 
commits:
    
    467e65f2 <Paddy Horan> Incorporated `BooleanArray` into `PrimitiveArray`
---
 rust/src/array.rs | 103 ++++++++++++++++++++++++------------------------------
 1 file changed, 45 insertions(+), 58 deletions(-)

diff --git a/rust/src/array.rs b/rust/src/array.rs
index 2da7104..9eca9eb 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -122,10 +122,14 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
     data: ArrayDataRef,
     /// Pointer to the value array. The lifetime of this must be <= to the 
value buffer
     /// stored in `data`, so it's safe to store.
+    /// Also note that boolean arrays are bit-packed, so although the 
underlying pointer is of type
+    /// bool it should be cast back to u8 before being used.
+    /// i.e. `self.raw_values.get() as *const u8`
     raw_values: RawPtrBox<T>,
 }
 
 /// Macro to define primitive arrays for different data types and native types.
+/// Boolean arrays are bit-packed and so are not defined by this macro
 macro_rules! def_primitive_array {
     ($data_ty:path, $native_ty:ident) => {
         impl PrimitiveArray<$native_ty> {
@@ -234,27 +238,27 @@ macro_rules! def_primitive_array {
                 PrimitiveArray::from(array_data)
             }
         }
-    };
-}
 
-/// Constructs a `PrimitiveArray` from an array data reference.
-impl<T: ArrowPrimitiveType> From<ArrayDataRef> for PrimitiveArray<T> {
-    fn from(data: ArrayDataRef) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "PrimitiveArray data should contain a single buffer only (values 
buffer)"
-        );
-        let raw_values = data.buffers()[0].raw_data();
-        assert!(
-            memory::is_aligned::<u8>(raw_values, mem::align_of::<T>()),
-            "memory is not aligned"
-        );
-        Self {
-            data,
-            raw_values: RawPtrBox::new(raw_values as *const T),
+        /// Constructs a `PrimitiveArray` from an array data reference.
+        impl From<ArrayDataRef> for PrimitiveArray<$native_ty> {
+            fn from(data: ArrayDataRef) -> Self {
+                assert_eq!(
+                    data.buffers().len(),
+                    1,
+                    "PrimitiveArray data should contain a single buffer only 
(values buffer)"
+                );
+                let raw_values = data.buffers()[0].raw_data();
+                assert!(
+                    memory::is_aligned::<u8>(raw_values, 
mem::align_of::<$native_ty>()),
+                    "memory is not aligned"
+                );
+                Self {
+                    data,
+                    raw_values: RawPtrBox::new(raw_values as *const 
$native_ty),
+                }
+            }
         }
-    }
+    };
 }
 
 impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
@@ -282,13 +286,8 @@ def_primitive_array!(DataType::Int64, i64);
 def_primitive_array!(DataType::Float32, f32);
 def_primitive_array!(DataType::Float64, f64);
 
-/// Array whose elements are of boolean types.
-pub struct BooleanArray {
-    data: ArrayDataRef,
-    raw_values: RawPtrBox<u8>,
-}
-
-impl BooleanArray {
+/// Specific implementation for Boolean arrays due to bit-packing
+impl PrimitiveArray<bool> {
     pub fn new(length: i64, values: Buffer, null_count: i64, offset: i64) -> 
Self {
         let array_data = ArrayData::builder(DataType::Boolean)
             .len(length)
@@ -296,7 +295,7 @@ impl BooleanArray {
             .null_count(null_count)
             .offset(offset)
             .build();
-        BooleanArray::from(array_data)
+        PrimitiveArray::from(array_data)
     }
 
     /// Returns a `Buffer` holds all the values of this array.
@@ -307,15 +306,15 @@ impl BooleanArray {
     }
 
     /// Returns the boolean value at index `i`.
+    ///
+    /// Note this doesn't do any bound checking, for performance reason.
     pub fn value(&self, i: i64) -> bool {
-        let offset = i + self.offset();
-        assert!(offset < self.data.len() as i64);
-        unsafe { bit_util::get_bit_raw(self.raw_values.get(), offset) }
+        unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, i + 
self.offset()) }
     }
 }
 
 /// Constructs a boolean array from a vector. Should only be used for testing.
-impl From<Vec<bool>> for BooleanArray {
+impl From<Vec<bool>> for PrimitiveArray<bool> {
     fn from(data: Vec<bool>) -> Self {
         let num_byte = bit_util::ceil(data.len() as i64, 8) as usize;
         let mut mut_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, 
false);
@@ -331,11 +330,11 @@ impl From<Vec<bool>> for BooleanArray {
             .len(data.len() as i64)
             .add_buffer(mut_buf.freeze())
             .build();
-        BooleanArray::from(array_data)
+        PrimitiveArray::from(array_data)
     }
 }
 
-impl From<Vec<Option<bool>>> for BooleanArray {
+impl From<Vec<Option<bool>>> for PrimitiveArray<bool> {
     fn from(data: Vec<Option<bool>>) -> Self {
         let data_len = data.len() as i64;
         let num_byte = bit_util::ceil(data_len, 8) as usize;
@@ -361,17 +360,17 @@ impl From<Vec<Option<bool>>> for BooleanArray {
             .add_buffer(val_buf.freeze())
             .null_bit_buffer(null_buf.freeze())
             .build();
-        BooleanArray::from(array_data)
+        PrimitiveArray::from(array_data)
     }
 }
 
-/// Constructs a `BooleanArray` from an array data reference.
-impl From<ArrayDataRef> for BooleanArray {
+/// Constructs a `PrimitiveArray<bool>` from an array data reference.
+impl From<ArrayDataRef> for PrimitiveArray<bool> {
     fn from(data: ArrayDataRef) -> Self {
         assert_eq!(
             data.buffers().len(),
             1,
-            "BooleanArray data should contain a single buffer only (values 
buffer)"
+            "PrimitiveArray data should contain a single buffer only (values 
buffer)"
         );
         let raw_values = data.buffers()[0].raw_data();
         assert!(
@@ -380,24 +379,12 @@ impl From<ArrayDataRef> for BooleanArray {
         );
         Self {
             data,
-            raw_values: RawPtrBox::new(raw_values),
+            raw_values: RawPtrBox::new(raw_values as *const bool),
         }
     }
 }
 
-impl Array for BooleanArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> ArrayDataRef {
-        self.data.clone()
-    }
-
-    fn data_ref(&self) -> &ArrayDataRef {
-        &self.data
-    }
-}
+pub type BooleanArray = PrimitiveArray<bool>;
 
 /// A list array where each element is a variable-sized sequence of values 
with the same
 /// type.
@@ -745,7 +732,7 @@ mod tests {
         // 00000010 01001000
         let buf = Buffer::from([72_u8, 2_u8]);
         let buf2 = buf.clone();
-        let arr = BooleanArray::new(10, buf, 0, 0);
+        let arr = PrimitiveArray::<bool>::new(10, buf, 0, 0);
         assert_eq!(buf2, arr.values());
         assert_eq!(10, arr.len());
         assert_eq!(0, arr.offset());
@@ -760,7 +747,7 @@ mod tests {
     #[test]
     fn test_boolean_array_from_vec() {
         let buf = Buffer::from([10_u8]);
-        let arr = BooleanArray::from(vec![false, true, false, true]);
+        let arr = PrimitiveArray::<bool>::from(vec![false, true, false, true]);
         assert_eq!(buf, arr.values());
         assert_eq!(4, arr.len());
         assert_eq!(0, arr.offset());
@@ -775,7 +762,7 @@ mod tests {
     #[test]
     fn test_boolean_array_from_vec_option() {
         let buf = Buffer::from([10_u8]);
-        let arr = BooleanArray::from(vec![Some(false), Some(true), None, 
Some(true)]);
+        let arr = PrimitiveArray::<bool>::from(vec![Some(false), Some(true), 
None, Some(true)]);
         assert_eq!(buf, arr.values());
         assert_eq!(4, arr.len());
         assert_eq!(0, arr.offset());
@@ -803,7 +790,7 @@ mod tests {
             .offset(2)
             .add_buffer(buf)
             .build();
-        let arr = BooleanArray::from(data);
+        let arr = PrimitiveArray::<bool>::from(data);
         assert_eq!(buf2, arr.values());
         assert_eq!(5, arr.len());
         assert_eq!(2, arr.offset());
@@ -815,11 +802,11 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "BooleanArray data should contain a single buffer only 
(values buffer)"
+        expected = "PrimitiveArray data should contain a single buffer only 
(values buffer)"
     )]
     fn test_boolean_array_invalid_buffer_len() {
         let data = ArrayData::builder(DataType::Boolean).len(5).build();
-        BooleanArray::from(data);
+        PrimitiveArray::<bool>::from(data);
     }
 
     #[test]
@@ -1037,7 +1024,7 @@ mod tests {
         let struct_array = StructArray::from(vec![
             (
                 Field::new("b", DataType::Boolean, false),
-                Arc::new(BooleanArray::from(vec![false, false, true, true])) 
as Arc<Array>,
+                Arc::new(PrimitiveArray::from(vec![false, false, true, true])) 
as Arc<Array>,
             ),
             (
                 Field::new("c", DataType::Int32, false),

Reply via email to