This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 5d8b39273 feat: Support bitwise and boolean aggregate functions (#4210)
5d8b39273 is described below
commit 5d8b39273f96e643739991dbc46e4fb71b462156
Author: Igor Izvekov <[email protected]>
AuthorDate: Fri May 19 11:24:44 2023 +0300
feat: Support bitwise and boolean aggregate functions (#4210)
* feat: Support bitwise and boolean aggregate functions
* fix: clippy
* feat: macro_rules for bit operations
* fix: SIMD and docs
---
arrow-arith/src/aggregate.rs | 201 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 201 insertions(+)
diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index 2833300dd..4961d7efc 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -24,6 +24,7 @@ use arrow_buffer::ArrowNativeType;
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_schema::ArrowError;
use arrow_schema::*;
+use std::ops::{BitAnd, BitOr, BitXor};
/// Generic test for NaN, the optimizer should be able to remove this for
integer types.
#[inline]
@@ -326,6 +327,115 @@ where
}
}
+macro_rules! bit_operation {
+ ($NAME:ident, $OP:ident, $NATIVE:ident, $DEFAULT:expr, $DOC:expr) => {
+ #[doc = $DOC]
+ ///
+ /// Returns `None` if the array is empty or only contains null values.
+ pub fn $NAME<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
+ where
+ T: ArrowNumericType,
+ T::Native: $NATIVE<Output = T::Native> + ArrowNativeTypeOp,
+ {
+ let default;
+ if $DEFAULT == -1 {
+ default = T::Native::ONE.neg_wrapping();
+ } else {
+ default = T::default_value();
+ }
+
+ let null_count = array.null_count();
+
+ if null_count == array.len() {
+ return None;
+ }
+
+ let data: &[T::Native] = array.values();
+
+ match array.nulls() {
+ None => {
+ let result = data
+ .iter()
+ .fold(default, |accumulator, value|
accumulator.$OP(*value));
+
+ Some(result)
+ }
+ Some(nulls) => {
+ let mut result = default;
+ let data_chunks = data.chunks_exact(64);
+ let remainder = data_chunks.remainder();
+
+ let bit_chunks = nulls.inner().bit_chunks();
+ data_chunks
+ .zip(bit_chunks.iter())
+ .for_each(|(chunk, mask)| {
+ // index_mask has value 1 << i in the loop
+ let mut index_mask = 1;
+ chunk.iter().for_each(|value| {
+ if (mask & index_mask) != 0 {
+ result = result.$OP(*value);
+ }
+ index_mask <<= 1;
+ });
+ });
+
+ let remainder_bits = bit_chunks.remainder_bits();
+
+ remainder.iter().enumerate().for_each(|(i, value)| {
+ if remainder_bits & (1 << i) != 0 {
+ result = result.$OP(*value);
+ }
+ });
+
+ Some(result)
+ }
+ }
+ }
+ };
+}
+
+bit_operation!(
+ bit_and,
+ bitand,
+ BitAnd,
+ -1,
+ "Returns the bitwise and of all non-null input values."
+);
+bit_operation!(
+ bit_or,
+ bitor,
+ BitOr,
+ 0,
+ "Returns the bitwise or of all non-null input values."
+);
+bit_operation!(
+ bit_xor,
+ bitxor,
+ BitXor,
+ 0,
+ "Returns the bitwise xor of all non-null input values."
+);
+
+/// Returns true if all non-null input values are true, otherwise false.
+///
+/// Returns `None` if the array is empty or only contains null values.
+pub fn bool_and(array: &BooleanArray) -> Option<bool> {
+ if array.null_count() == array.len() {
+ return None;
+ }
+ Some(array.false_count() == 0)
+}
+
+/// Returns true if any non-null input value is true, otherwise false.
+///
+/// Returns `None` if the array is empty or only contains null values.
+pub fn bool_or(array: &BooleanArray) -> Option<bool> {
+ if array.null_count() == array.len() {
+ return None;
+ }
+ Some(array.true_count() != 0)
+}
+
/// Returns the sum of values in the primitive array.
///
/// Returns `Ok(None)` if the array is empty or only contains null values.
@@ -838,6 +948,97 @@ mod tests {
assert_eq!(Some((1..=100).filter(|i| i % 33 == 0).sum()), sum(&c));
}
+ #[test]
+ fn test_primitive_array_bit_and() {
+ let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ assert_eq!(0, bit_and(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bit_and_with_nulls() {
+ let a = Int32Array::from(vec![None, Some(2), Some(3), None, None]);
+ assert_eq!(2, bit_and(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bit_and_all_nulls() {
+ let a = Int32Array::from(vec![None, None, None]);
+ assert_eq!(None, bit_and(&a));
+ }
+
+ #[test]
+ fn test_primitive_array_bit_or() {
+ let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ assert_eq!(7, bit_or(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bit_or_with_nulls() {
+ let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
+ assert_eq!(7, bit_or(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bit_or_all_nulls() {
+ let a = Int32Array::from(vec![None, None, None]);
+ assert_eq!(None, bit_or(&a));
+ }
+
+ #[test]
+ fn test_primitive_array_bit_xor() {
+ let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ assert_eq!(1, bit_xor(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bit_xor_with_nulls() {
+ let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
+ assert_eq!(4, bit_xor(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bit_xor_all_nulls() {
+ let a = Int32Array::from(vec![None, None, None]);
+ assert_eq!(None, bit_xor(&a));
+ }
+
+ #[test]
+ fn test_primitive_array_bool_and() {
+ let a = BooleanArray::from(vec![true, false, true, false, true]);
+ assert!(!bool_and(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bool_and_with_nulls() {
+ let a = BooleanArray::from(vec![None, Some(true), Some(true), None,
Some(true)]);
+ assert!(bool_and(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bool_and_all_nulls() {
+ let a = BooleanArray::from(vec![None, None, None]);
+ assert_eq!(None, bool_and(&a));
+ }
+
+ #[test]
+ fn test_primitive_array_bool_or() {
+ let a = BooleanArray::from(vec![true, false, true, false, true]);
+ assert!(bool_or(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bool_or_with_nulls() {
+ let a =
+ BooleanArray::from(vec![None, Some(false), Some(false), None,
Some(false)]);
+ assert!(!bool_or(&a).unwrap());
+ }
+
+ #[test]
+ fn test_primitive_array_bool_or_all_nulls() {
+ let a = BooleanArray::from(vec![None, None, None]);
+ assert_eq!(None, bool_or(&a));
+ }
+
#[test]
fn test_primitive_array_min_max() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);