This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ee36b216 Add Scalar/Datum abstraction (#1047) (#4393)
9ee36b216 is described below

commit 9ee36b216c3f7dcbaae520f451194acd4f55b98e
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Jul 4 10:06:38 2023 +0100

    Add Scalar/Datum abstraction (#1047) (#4393)
    
    * Add Scalar/Datum abstraction (#1047)
    
    * Add dyn Array
---
 arrow-array/src/lib.rs     |   3 ++
 arrow-array/src/scalar.rs  | 116 +++++++++++++++++++++++++++++++++++++++++++++
 arrow-select/src/filter.rs |  10 ----
 3 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index 46de381c3..afb7ec5e6 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -192,6 +192,9 @@ pub use arithmetic::ArrowNativeTypeOp;
 mod numeric;
 pub use numeric::*;
 
+mod scalar;
+pub use scalar::*;
+
 pub mod builder;
 pub mod cast;
 mod delta;
diff --git a/arrow-array/src/scalar.rs b/arrow-array/src/scalar.rs
new file mode 100644
index 000000000..e54a999f9
--- /dev/null
+++ b/arrow-array/src/scalar.rs
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::Array;
+
+/// A possibly [`Scalar`] [`Array`]
+///
+/// This allows optimised binary kernels where one or more arguments are 
constant
+///
+/// ```
+/// # use arrow_array::*;
+/// # use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
+/// # use arrow_schema::ArrowError;
+/// #
+/// fn eq_impl<T: ArrowPrimitiveType>(
+///     a: &PrimitiveArray<T>,
+///     a_scalar: bool,
+///     b: &PrimitiveArray<T>,
+///     b_scalar: bool,
+/// ) -> BooleanArray {
+///     let (array, scalar) = match (a_scalar, b_scalar) {
+///         (true, true) | (false, false) => {
+///             let len = a.len().min(b.len());
+///             let nulls = NullBuffer::union(a.nulls(), b.nulls());
+///             let buffer = BooleanBuffer::collect_bool(len, |idx| 
a.value(idx) == b.value(idx));
+///             return BooleanArray::new(buffer, nulls);
+///         }
+///         (true, false) => (b, (a.null_count() == 0).then(|| a.value(0))),
+///         (false, true) => (a, (b.null_count() == 0).then(|| b.value(0))),
+///     };
+///     match scalar {
+///         Some(v) => {
+///             let len = array.len();
+///             let nulls = array.nulls().cloned();
+///             let buffer = BooleanBuffer::collect_bool(len, |idx| 
array.value(idx) == v);
+///             BooleanArray::new(buffer, nulls)
+///         }
+///         None => BooleanArray::new_null(array.len()),
+///     }
+/// }
+///
+/// pub fn eq(l: &dyn Datum, r: &dyn Datum) -> Result<BooleanArray, 
ArrowError> {
+///     let (l_array, l_scalar) = l.get();
+///     let (r_array, r_scalar) = r.get();
+///     downcast_primitive_array!(
+///         (l_array, r_array) => Ok(eq_impl(l_array, l_scalar, r_array, 
r_scalar)),
+///         (a, b) => Err(ArrowError::NotYetImplemented(format!("{a} == 
{b}"))),
+///     )
+/// }
+///
+/// // Comparison of two arrays
+/// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+/// let b = Int32Array::from(vec![1, 2, 4, 7, 3]);
+/// let r = eq(&a, &b).unwrap();
+/// let values: Vec<_> = r.values().iter().collect();
+/// assert_eq!(values, &[true, true, false, false, false]);
+///
+/// // Comparison of an array and a scalar
+/// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+/// let b = Int32Array::from(vec![1]);
+/// let r = eq(&a, &Scalar::new(&b)).unwrap();
+/// let values: Vec<_> = r.values().iter().collect();
+/// assert_eq!(values, &[true, false, false, false, false]);
+pub trait Datum {
+    /// Returns the value for this [`Datum`] and a boolean indicating if the 
value is scalar
+    fn get(&self) -> (&dyn Array, bool);
+}
+
+impl<T: Array> Datum for T {
+    fn get(&self) -> (&dyn Array, bool) {
+        (self, false)
+    }
+}
+
+impl Datum for dyn Array {
+    fn get(&self) -> (&dyn Array, bool) {
+        (self, false)
+    }
+}
+
+/// A wrapper around a single value [`Array`] indicating kernels should treat 
it as a scalar value
+///
+/// See [`Datum`] for more information
+pub struct Scalar<'a>(&'a dyn Array);
+
+impl<'a> Scalar<'a> {
+    /// Create a new [`Scalar`] from an [`Array`]
+    ///
+    /// # Panics
+    ///
+    /// Panics if `array.len() != 1`
+    pub fn new(array: &'a dyn Array) -> Self {
+        assert_eq!(array.len(), 1);
+        Self(array)
+    }
+}
+
+impl<'a> Datum for Scalar<'a> {
+    fn get(&self) -> (&dyn Array, bool) {
+        (self.0, true)
+    }
+}
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index c89491944..94afd2df3 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -321,16 +321,6 @@ fn filter_array(
         // actually filter
         _ => downcast_primitive_array! {
             values => Ok(Arc::new(filter_primitive(values, predicate))),
-            DataType::Decimal128(p, s) => {
-                let values = 
values.as_any().downcast_ref::<Decimal128Array>().unwrap();
-                let filtered = filter_primitive(values, predicate);
-                Ok(Arc::new(filtered.with_precision_and_scale(*p, 
*s).unwrap()))
-            }
-            DataType::Decimal256(p, s) => {
-                let values = 
values.as_any().downcast_ref::<Decimal256Array>().unwrap();
-                let filtered = filter_primitive(values, predicate);
-                Ok(Arc::new(filtered.with_precision_and_scale(*p, 
*s).unwrap()))
-            }
             DataType::Boolean => {
                 let values = 
values.as_any().downcast_ref::<BooleanArray>().unwrap();
                 Ok(Arc::new(filter_boolean(values, predicate)))

Reply via email to