[arrow] branch master updated: ARROW-4196: [Rust] Add explicit SIMD vectorization for arithmetic ops in "array_ops"

agrove Mon, 18 Feb 2019 10:16:23 -0800

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new a65798a  ARROW-4196: [Rust] Add explicit SIMD vectorization for 
arithmetic ops in "array_ops"
a65798a is described below

commit a65798a0ed3a96cfcd765e84615321da25ae17c8
Author: Paddy Horan <[email protected]>
AuthorDate: Mon Feb 18 11:15:30 2019 -0700

    ARROW-4196: [Rust] Add explicit SIMD vectorization for arithmetic ops in 
"array_ops"
    
    Notes:
    
    I moved the `divide` kernel but did not update it.  I will address this as 
part of 
[ARROW-4590](https://issues.apache.org/jira/browse/ARROW-4590?filter=-1) as I 
need to check for zero.  Up to this point I could ignore this and read/write to 
the padded region.
    
    I tried to define a `ArrowSIMDType` trait but I felt it was worse than 
conditionally compiling the SIMD methods into `ArrowNumericType`.
    
    Author: Paddy Horan <[email protected]>
    
    Closes #3680 from paddyhoran/arithmetic-kernels and squashes the following 
commits:
    
    d447bda <Paddy Horan> Fixed code comment lints
    5e0bbd1 <Paddy Horan> Added benchmark for `arithmetic_kernels`
    0f90968 <Paddy Horan> Added `arithmetic_kernels`
    577d9c2 <Paddy Horan> Add SIMD associated type to `ArrowNumericType`
---
 rust/arrow/Cargo.toml                        |   4 +
 rust/arrow/benches/arithmetic_kernels.rs     |  80 +++++++
 rust/arrow/src/compute/arithmetic_kernels.rs | 311 +++++++++++++++++++++++++++
 rust/arrow/src/compute/array_ops.rs          | 203 +----------------
 rust/arrow/src/compute/boolean_kernels.rs    |   6 +-
 rust/arrow/src/compute/mod.rs                |   2 +
 rust/arrow/src/datatypes.rs                  |  91 ++++++--
 rust/arrow/src/util/bit_util.rs              |   6 +-
 rust/datafusion/examples/csv_sql.rs          |   3 +-
 rust/datafusion/src/dfparser.rs              |   4 +-
 rust/datafusion/src/execution/aggregate.rs   |  21 +-
 rust/datafusion/src/execution/expression.rs  |  13 +-
 rust/datafusion/src/execution/filter.rs      |   3 +-
 rust/datafusion/src/lib.rs                   |   4 +-
 rust/datafusion/src/logicalplan.rs           |   4 +-
 rust/datafusion/src/sqlplanner.rs            |   6 +-
 16 files changed, 517 insertions(+), 244 deletions(-)

diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 04e8ac0..ae09ee8 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -62,3 +62,7 @@ harness = false
 [[bench]]
 name = "boolean_kernels"
 harness = false
+
+[[bench]]
+name = "arithmetic_kernels"
+harness = false
diff --git a/rust/arrow/benches/arithmetic_kernels.rs 
b/rust/arrow/benches/arithmetic_kernels.rs
new file mode 100644
index 0000000..2271bd5
--- /dev/null
+++ b/rust/arrow/benches/arithmetic_kernels.rs
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+use criterion::Criterion;
+
+extern crate arrow;
+
+use arrow::array::*;
+use arrow::builder::*;
+use arrow::compute::arithmetic_kernels::*;
+use arrow::error::Result;
+
+fn create_array(size: usize) -> Float32Array {
+    let mut builder = Float32Builder::new(size);
+    for _i in 0..size {
+        builder.append_value(1.0).unwrap();
+    }
+    builder.finish()
+}
+
+fn bin_op_no_simd<F>(size: usize, op: F)
+where
+    F: Fn(f32, f32) -> Result<f32>,
+{
+    let arr_a = create_array(size);
+    let arr_b = create_array(size);
+    criterion::black_box(math_op(&arr_a, &arr_b, op).unwrap());
+}
+
+fn add_simd(size: usize) {
+    let arr_a = create_array(size);
+    let arr_b = create_array(size);
+    criterion::black_box(add(&arr_a, &arr_b).unwrap());
+}
+
+fn subtract_simd(size: usize) {
+    let arr_a = create_array(size);
+    let arr_b = create_array(size);
+    criterion::black_box(subtract(&arr_a, &arr_b).unwrap());
+}
+
+fn multiply_simd(size: usize) {
+    let arr_a = create_array(size);
+    let arr_b = create_array(size);
+    criterion::black_box(multiply(&arr_a, &arr_b).unwrap());
+}
+
+fn add_benchmark(c: &mut Criterion) {
+    c.bench_function("add 512", |b| {
+        b.iter(|| bin_op_no_simd(512, |a, b| Ok(a + b)))
+    });
+    c.bench_function("add 512 simd", |b| b.iter(|| add_simd(512)));
+    c.bench_function("subtract 512", |b| {
+        b.iter(|| bin_op_no_simd(512, |a, b| Ok(a - b)))
+    });
+    c.bench_function("subtract 512 simd", |b| b.iter(|| subtract_simd(512)));
+    c.bench_function("multiply 512", |b| {
+        b.iter(|| bin_op_no_simd(512, |a, b| Ok(a * b)))
+    });
+    c.bench_function("multiply 512 simd", |b| b.iter(|| multiply_simd(512)));
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/rust/arrow/src/compute/arithmetic_kernels.rs 
b/rust/arrow/src/compute/arithmetic_kernels.rs
new file mode 100644
index 0000000..2566002
--- /dev/null
+++ b/rust/arrow/src/compute/arithmetic_kernels.rs
@@ -0,0 +1,311 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines basic arithmetic kernels for `PrimitiveArrays`.
+//!
+//! These kernels can leverage SIMD if available on your system.  Currently no 
runtime
+//! detection is provided, you should enable the specific SIMD intrinsics using
+//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
+//! [here] (https://doc.rust-lang.org/stable/std/arch/) for more information.
+
+use std::mem;
+use std::ops::{Add, Div, Mul, Sub};
+use std::slice::from_raw_parts_mut;
+use std::sync::Arc;
+
+use num::Zero;
+
+use crate::array::*;
+use crate::array_data::ArrayData;
+use crate::buffer::MutableBuffer;
+use crate::builder::PrimitiveBuilder;
+use crate::compute::util::apply_bin_op_to_option_bitmap;
+use crate::datatypes;
+use crate::error::{ArrowError, Result};
+
+/// Helper function to perform math lambda function on values from two arrays. 
If either
+/// left or right value is null then the output value is also null, so `1 + 
null` is
+/// `null`.
+pub fn math_op<T, F>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+    op: F,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    F: Fn(T::Native, T::Native) -> Result<T::Native>,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform math operation on arrays of different 
length".to_string(),
+        ));
+    }
+    let mut b = PrimitiveBuilder::<T>::new(left.len());
+    for i in 0..left.len() {
+        let index = i;
+        if left.is_null(i) || right.is_null(i) {
+            b.append_null()?;
+        } else {
+            b.append_value(op(left.value(index), right.value(index))?)?;
+        }
+    }
+    Ok(b.finish())
+}
+
+/// SIMD vectorized version of `math_op` above.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn simd_math_op<T, F>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+    op: F,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Simd: Add<Output = T::Simd>
+        + Sub<Output = T::Simd>
+        + Mul<Output = T::Simd>
+        + Div<Output = T::Simd>,
+    F: Fn(T::Simd, T::Simd) -> T::Simd,
+{
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform math operation on arrays of different 
length".to_string(),
+        ));
+    }
+
+    let null_bit_buffer = apply_bin_op_to_option_bitmap(
+        left.data().null_bitmap(),
+        right.data().null_bitmap(),
+        |a, b| a & b,
+    )?;
+
+    let lanes = T::lanes();
+    let buffer_size = left.len() * mem::size_of::<T::Native>();
+    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
+
+    for i in (0..left.len()).step_by(lanes) {
+        let simd_left = T::load(left.value_slice(i, lanes));
+        let simd_right = T::load(right.value_slice(i, lanes));
+        let simd_result = T::bin_op(simd_left, simd_right, &op);
+
+        let result_slice: &mut [T::Native] = unsafe {
+            from_raw_parts_mut(
+                (result.data_mut().as_mut_ptr() as *mut T::Native).offset(i as 
isize),
+                lanes,
+            )
+        };
+        T::write(simd_result, result_slice);
+    }
+
+    let data = ArrayData::new(
+        T::get_data_type(),
+        left.len(),
+        None,
+        null_bit_buffer,
+        left.offset(),
+        vec![result.freeze()],
+        vec![],
+    );
+    Ok(PrimitiveArray::<T>::from(Arc::new(data)))
+}
+
+/// Perform `left + right` operation on two arrays. If either left or right 
value is null
+/// then the result is also null.
+pub fn add<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    return simd_math_op(&left, &right, |a, b| a + b);
+
+    #[allow(unreachable_code)]
+    math_op(left, right, |a, b| Ok(a + b))
+}
+
+/// Perform `left - right` operation on two arrays. If either left or right 
value is null
+/// then the result is also null.
+pub fn subtract<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    return simd_math_op(&left, &right, |a, b| a - b);
+
+    #[allow(unreachable_code)]
+    math_op(left, right, |a, b| Ok(a - b))
+}
+
+/// Perform `left * right` operation on two arrays. If either left or right 
value is null
+/// then the result is also null.
+pub fn multiply<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    return simd_math_op(&left, &right, |a, b| a * b);
+
+    #[allow(unreachable_code)]
+    math_op(left, right, |a, b| Ok(a * b))
+}
+
+/// Perform `left / right` operation on two arrays. If either left or right 
value is null
+/// then the result is also null. If any right hand value is zero then the 
result of this
+/// operation will be `Err(ArrowError::DivideByZero)`.
+pub fn divide<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: datatypes::ArrowNumericType,
+    T::Native: Add<Output = T::Native>
+        + Sub<Output = T::Native>
+        + Mul<Output = T::Native>
+        + Div<Output = T::Native>
+        + Zero,
+{
+    math_op(left, right, |a, b| {
+        if b.is_zero() {
+            Err(ArrowError::DivideByZero)
+        } else {
+            Ok(a / b)
+        }
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::array::Int32Array;
+
+    #[test]
+    fn test_primitive_array_add() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
+        let c = add(&a, &b).unwrap();
+        assert_eq!(11, c.value(0));
+        assert_eq!(13, c.value(1));
+        assert_eq!(15, c.value(2));
+        assert_eq!(17, c.value(3));
+        assert_eq!(17, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_add_mismatched_length() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        let b = Int32Array::from(vec![6, 7, 8]);
+        let e = add(&a, &b)
+            .err()
+            .expect("should have failed due to different lengths");
+        assert_eq!(
+            "ComputeError(\"Cannot perform math operation on arrays of 
different length\")",
+            format!("{:?}", e)
+        );
+    }
+
+    #[test]
+    fn test_primitive_array_subtract() {
+        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let b = Int32Array::from(vec![5, 4, 3, 2, 1]);
+        let c = subtract(&a, &b).unwrap();
+        assert_eq!(-4, c.value(0));
+        assert_eq!(-2, c.value(1));
+        assert_eq!(0, c.value(2));
+        assert_eq!(2, c.value(3));
+        assert_eq!(4, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_multiply() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
+        let c = multiply(&a, &b).unwrap();
+        assert_eq!(30, c.value(0));
+        assert_eq!(42, c.value(1));
+        assert_eq!(56, c.value(2));
+        assert_eq!(72, c.value(3));
+        assert_eq!(72, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_divide() {
+        let a = Int32Array::from(vec![15, 15, 8, 1, 9]);
+        let b = Int32Array::from(vec![5, 6, 8, 9, 1]);
+        let c = divide(&a, &b).unwrap();
+        assert_eq!(3, c.value(0));
+        assert_eq!(2, c.value(1));
+        assert_eq!(1, c.value(2));
+        assert_eq!(0, c.value(3));
+        assert_eq!(9, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_divide_by_zero() {
+        let a = Int32Array::from(vec![15]);
+        let b = Int32Array::from(vec![0]);
+        assert_eq!(
+            ArrowError::DivideByZero,
+            divide(&a, &b).err().expect("divide by zero should fail")
+        );
+    }
+
+    #[test]
+    fn test_primitive_array_divide_f64() {
+        let a = Float64Array::from(vec![15.0, 15.0, 8.0]);
+        let b = Float64Array::from(vec![5.0, 6.0, 8.0]);
+        let c = divide(&a, &b).unwrap();
+        assert_eq!(3.0, c.value(0));
+        assert_eq!(2.5, c.value(1));
+        assert_eq!(1.0, c.value(2));
+    }
+
+    #[test]
+    fn test_primitive_array_add_with_nulls() {
+        let a = Int32Array::from(vec![Some(5), None, Some(7), None]);
+        let b = Int32Array::from(vec![None, None, Some(6), Some(7)]);
+        let c = add(&a, &b).unwrap();
+        assert_eq!(true, c.is_null(0));
+        assert_eq!(true, c.is_null(1));
+        assert_eq!(false, c.is_null(2));
+        assert_eq!(true, c.is_null(3));
+        assert_eq!(13, c.value(2));
+    }
+
+}
diff --git a/rust/arrow/src/compute/array_ops.rs 
b/rust/arrow/src/compute/array_ops.rs
index 1a2d5fa..0d6ccbe 100644
--- a/rust/arrow/src/compute/array_ops.rs
+++ b/rust/arrow/src/compute/array_ops.rs
@@ -17,120 +17,12 @@
 
 //! Defines primitive computations on arrays, e.g. addition, equality, boolean 
logic.
 
-use std::ops::{Add, Div, Mul, Sub};
-
-use num::Zero;
+use std::ops::Add;
 
 use crate::array::{Array, BooleanArray, PrimitiveArray};
-use crate::builder::PrimitiveBuilder;
-use crate::datatypes;
 use crate::datatypes::ArrowNumericType;
 use crate::error::{ArrowError, Result};
 
-/// Perform `left + right` operation on two arrays. If either left or right 
value is null
-/// then the result is also null.
-pub fn add<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    math_op(left, right, |a, b| Ok(a + b))
-}
-
-/// Perform `left - right` operation on two arrays. If either left or right 
value is null
-/// then the result is also null.
-pub fn subtract<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    math_op(left, right, |a, b| Ok(a - b))
-}
-
-/// Perform `left * right` operation on two arrays. If either left or right 
value is null
-/// then the result is also null.
-pub fn multiply<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    math_op(left, right, |a, b| Ok(a * b))
-}
-
-/// Perform `left / right` operation on two arrays. If either left or right 
value is null
-/// then the result is also null. If any right hand value is zero then the 
result of this
-/// operation will be `Err(ArrowError::DivideByZero)`.
-pub fn divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    math_op(left, right, |a, b| {
-        if b.is_zero() {
-            Err(ArrowError::DivideByZero)
-        } else {
-            Ok(a / b)
-        }
-    })
-}
-
-/// Helper function to perform math lambda function on values from two arrays. 
If either
-/// left or right value is null then the output value is also null, so `1 + 
null` is
-/// `null`.
-fn math_op<T, F>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    op: F,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> Result<T::Native>,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different 
length".to_string(),
-        ));
-    }
-    let mut b = PrimitiveBuilder::<T>::new(left.len());
-    for i in 0..left.len() {
-        let index = i;
-        if left.is_null(i) || right.is_null(i) {
-            b.append_null()?;
-        } else {
-            b.append_value(op(left.value(index), right.value(index))?)?;
-        }
-    }
-    Ok(b.finish())
-}
-
 /// Returns the minimum value in the array, according to the natural order.
 pub fn min<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
 where
@@ -318,99 +210,6 @@ mod tests {
     use crate::array::{Float64Array, Int32Array};
 
     #[test]
-    fn test_primitive_array_add() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
-        let c = add(&a, &b).unwrap();
-        assert_eq!(11, c.value(0));
-        assert_eq!(13, c.value(1));
-        assert_eq!(15, c.value(2));
-        assert_eq!(17, c.value(3));
-        assert_eq!(17, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_add_mismatched_length() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8]);
-        let e = add(&a, &b)
-            .err()
-            .expect("should have failed due to different lengths");
-        assert_eq!(
-            "ComputeError(\"Cannot perform math operation on arrays of 
different length\")",
-            format!("{:?}", e)
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_subtract() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![5, 4, 3, 2, 1]);
-        let c = subtract(&a, &b).unwrap();
-        assert_eq!(-4, c.value(0));
-        assert_eq!(-2, c.value(1));
-        assert_eq!(0, c.value(2));
-        assert_eq!(2, c.value(3));
-        assert_eq!(4, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_multiply() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
-        let c = multiply(&a, &b).unwrap();
-        assert_eq!(30, c.value(0));
-        assert_eq!(42, c.value(1));
-        assert_eq!(56, c.value(2));
-        assert_eq!(72, c.value(3));
-        assert_eq!(72, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide() {
-        let a = Int32Array::from(vec![15, 15, 8, 1, 9]);
-        let b = Int32Array::from(vec![5, 6, 8, 9, 1]);
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(3, c.value(0));
-        assert_eq!(2, c.value(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide_by_zero() {
-        let a = Int32Array::from(vec![15]);
-        let b = Int32Array::from(vec![0]);
-        assert_eq!(
-            ArrowError::DivideByZero,
-            divide(&a, &b).err().expect("divide by zero should fail")
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_divide_f64() {
-        let a = Float64Array::from(vec![15.0, 15.0, 8.0]);
-        let b = Float64Array::from(vec![5.0, 6.0, 8.0]);
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(3.0, c.value(0));
-        assert_eq!(2.5, c.value(1));
-        assert_eq!(1.0, c.value(2));
-    }
-
-    #[test]
-    fn test_primitive_array_add_with_nulls() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), None]);
-        let b = Int32Array::from(vec![None, None, Some(6), Some(7)]);
-        let c = add(&a, &b).unwrap();
-        assert_eq!(true, c.is_null(0));
-        assert_eq!(true, c.is_null(1));
-        assert_eq!(false, c.is_null(2));
-        assert_eq!(true, c.is_null(3));
-        assert_eq!(13, c.value(2));
-    }
-
-    #[test]
     fn test_primitive_array_sum() {
         let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
         assert_eq!(15, sum(&a).unwrap());
diff --git a/rust/arrow/src/compute/boolean_kernels.rs 
b/rust/arrow/src/compute/boolean_kernels.rs
index ededea9..f95d3d6 100644
--- a/rust/arrow/src/compute/boolean_kernels.rs
+++ b/rust/arrow/src/compute/boolean_kernels.rs
@@ -17,8 +17,10 @@
 
 //! Defines boolean kernels on Arrow `BooleanArray`'s, e.g. `AND`, `OR` and 
`NOT`.
 //!
-//! Kernels support SIMD using [static CPU feature 
detection](https://doc.rust-lang.org/stable/std/arch/#static-cpu-feature-detection)
-//! .
+//! These kernels can leverage SIMD if available on your system.  Currently no 
runtime
+//! detection is provided, you should enable the specific SIMD intrinsics using
+//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
+//! [here] (https://doc.rust-lang.org/stable/std/arch/) for more information.
 
 use std::sync::Arc;
 
diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs
index 5aee2b3..612ffcf 100644
--- a/rust/arrow/src/compute/mod.rs
+++ b/rust/arrow/src/compute/mod.rs
@@ -17,10 +17,12 @@
 
 //! Computation kernels on Arrow Arrays
 
+pub mod arithmetic_kernels;
 pub mod array_ops;
 pub mod boolean_kernels;
 
 mod util;
 
+pub use self::arithmetic_kernels::*;
 pub use self::array_ops::*;
 pub use self::boolean_kernels::*;
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 4dc55d6..3baa5b9 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -23,9 +23,11 @@
 
 use std::fmt;
 use std::mem::size_of;
+use std::ops::{Add, Div, Mul, Sub};
 use std::slice::from_raw_parts;
 use std::str::FromStr;
 
+use packed_simd::*;
 use serde_derive::{Deserialize, Serialize};
 use serde_json::{json, Value};
 
@@ -155,18 +157,83 @@ make_type!(Float32Type, f32, DataType::Float32, 32, 
0.0f32);
 make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64);
 
 /// A subtype of primitive type that represents numeric values.
-pub trait ArrowNumericType: ArrowPrimitiveType {}
-
-impl ArrowNumericType for Int8Type {}
-impl ArrowNumericType for Int16Type {}
-impl ArrowNumericType for Int32Type {}
-impl ArrowNumericType for Int64Type {}
-impl ArrowNumericType for UInt8Type {}
-impl ArrowNumericType for UInt16Type {}
-impl ArrowNumericType for UInt32Type {}
-impl ArrowNumericType for UInt64Type {}
-impl ArrowNumericType for Float32Type {}
-impl ArrowNumericType for Float64Type {}
+///
+/// SIMD operations are defined in this trait if available on the target 
system.
+pub trait ArrowNumericType: ArrowPrimitiveType
+where
+    Self::Simd: Add<Output = Self::Simd>
+        + Sub<Output = Self::Simd>
+        + Mul<Output = Self::Simd>
+        + Div<Output = Self::Simd>,
+{
+    /// Defines the SIMD type that should be used for this numeric type
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    type Simd;
+
+    /// The number of SIMD lanes available
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    fn lanes() -> usize;
+
+    /// Loads a slice into a SIMD register
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    fn load(slice: &[Self::Native]) -> Self::Simd;
+
+    /// Performs a SIMD binary operation
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
+        left: Self::Simd,
+        right: Self::Simd,
+        op: F,
+    ) -> Self::Simd;
+
+    /// Writes a SIMD result back to a slice
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    fn write(simd_result: Self::Simd, slice: &mut [Self::Native]);
+}
+
+macro_rules! make_numeric_type {
+    ($impl_ty:ty, $native_ty:ty, $simd_ty:ident) => {
+        impl ArrowNumericType for $impl_ty {
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            type Simd = $simd_ty;
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            fn lanes() -> usize {
+                $simd_ty::lanes()
+            }
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            fn load(slice: &[$native_ty]) -> $simd_ty {
+                unsafe { $simd_ty::from_slice_unaligned_unchecked(slice) }
+            }
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            fn bin_op<F: Fn($simd_ty, $simd_ty) -> $simd_ty>(
+                left: $simd_ty,
+                right: $simd_ty,
+                op: F,
+            ) -> $simd_ty {
+                op(left, right)
+            }
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            fn write(simd_result: $simd_ty, slice: &mut [$native_ty]) {
+                unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) 
};
+            }
+        }
+    };
+}
+
+make_numeric_type!(Int8Type, i8, i8x64);
+make_numeric_type!(Int16Type, i16, i16x32);
+make_numeric_type!(Int32Type, i32, i32x16);
+make_numeric_type!(Int64Type, i64, i64x8);
+make_numeric_type!(UInt8Type, u8, u8x64);
+make_numeric_type!(UInt16Type, u16, u16x32);
+make_numeric_type!(UInt32Type, u32, u32x16);
+make_numeric_type!(UInt64Type, u64, u64x8);
+make_numeric_type!(Float32Type, f32, f32x16);
+make_numeric_type!(Float64Type, f64, f64x8);
 
 /// Allows conversion from supported Arrow types to a byte slice.
 pub trait ToByteSlice {
diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs
index 23bc3d5..89ebd95 100644
--- a/rust/arrow/src/util/bit_util.rs
+++ b/rust/arrow/src/util/bit_util.rs
@@ -121,9 +121,9 @@ pub fn ceil(value: usize, divisor: usize) -> usize {
 
 /// Performs SIMD bitwise binary operations.
 ///
-/// Note that each slice should be 64 bytes and it is the callers 
responsibility to ensure that
-/// this is the case.  If passed slices larger than 64 bytes the operation 
will only be performed
-/// on the first 64 bytes.  Slices less than 64 bytes will panic.
+/// Note that each slice should be 64 bytes and it is the callers 
responsibility to ensure
+/// that this is the case.  If passed slices larger than 64 bytes the 
operation will only
+/// be performed on the first 64 bytes.  Slices less than 64 bytes will panic.
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 pub unsafe fn bitwise_bin_op_simd<F>(left: &[u8], right: &[u8], result: &mut 
[u8], op: F)
 where
diff --git a/rust/datafusion/examples/csv_sql.rs 
b/rust/datafusion/examples/csv_sql.rs
index 40959cb..86f0f7f 100644
--- a/rust/datafusion/examples/csv_sql.rs
+++ b/rust/datafusion/examples/csv_sql.rs
@@ -28,7 +28,8 @@ use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::execution::context::ExecutionContext;
 use datafusion::execution::datasource::CsvDataSource;
 
-/// This example demonstrates executing a simple query against an Arrow data 
source and fetching results
+/// This example demonstrates executing a simple query against an Arrow data 
source and
+/// fetching results
 fn main() {
     // create local execution context
     let mut ctx = ExecutionContext::new();
diff --git a/rust/datafusion/src/dfparser.rs b/rust/datafusion/src/dfparser.rs
index 7abbd8d..f438c03 100644
--- a/rust/datafusion/src/dfparser.rs
+++ b/rust/datafusion/src/dfparser.rs
@@ -17,8 +17,8 @@
 
 //! SQL Parser
 //!
-//! Note that most SQL parsing is now delegated to the sqlparser crate, which 
handles ANSI SQL but
-//! this module contains DataFusion-specific SQL extensions.
+//! Note that most SQL parsing is now delegated to the sqlparser crate, which 
handles ANSI
+//! SQL but this module contains DataFusion-specific SQL extensions.
 
 use sqlparser::dialect::*;
 use sqlparser::sqlast::*;
diff --git a/rust/datafusion/src/execution/aggregate.rs 
b/rust/datafusion/src/execution/aggregate.rs
index d8cbfdf..127a1ab 100644
--- a/rust/datafusion/src/execution/aggregate.rs
+++ b/rust/datafusion/src/execution/aggregate.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Execution of a simple aggregate relation containing MIN, MAX, COUNT, SUM 
aggregate functions
-//! with optional GROUP BY columns
+//! Execution of a simple aggregate relation containing MIN, MAX, COUNT, SUM 
aggregate
+//! functions with optional GROUP BY columns
 
 use std::cell::RefCell;
 use std::rc::Rc;
@@ -63,8 +63,8 @@ impl AggregateRelation {
     }
 }
 
-/// Enumeration of types that can be used in a GROUP BY expression (all 
primitives except for
-/// floating point numerics)
+/// Enumeration of types that can be used in a GROUP BY expression (all 
primitives except
+/// for floating point numerics)
 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
 enum GroupByScalar {
     UInt8(u8),
@@ -692,7 +692,8 @@ macro_rules! group_array_from_map_entries {
     }};
 }
 
-/// Create array from `value` attribute in map entry (representing an 
aggregate scalar value)
+/// Create array from `value` attribute in map entry (representing an 
aggregate scalar
+/// value)
 macro_rules! aggr_array_from_map_entries {
     ($BUILDER:ident, $TY:ident, $ENTRIES:expr, $COL_INDEX:expr) => {{
         let mut builder = $BUILDER::new($ENTRIES.len());
@@ -803,9 +804,10 @@ impl AggregateRelation {
     }
 
     fn with_group_by(&mut self) -> Result<Option<RecordBatch>> {
-        //NOTE this whole method is currently very inefficient with too many 
per-row operations
-        // involving pattern matching and downcasting ... I'm sure this can be 
re-implemented in
-        // a much more efficient way that takes better advantage of Arrow
+        //NOTE this whole method is currently very inefficient with too many 
per-row
+        // operations involving pattern matching and downcasting ... I'm sure 
this
+        // can be re-implemented in a much more efficient way that takes better
+        // advantage of Arrow
 
         // create map to store aggregate results
         let mut map: FnvHashMap<Vec<GroupByScalar>, 
Rc<RefCell<AccumulatorSet>>> =
@@ -877,7 +879,8 @@ impl AggregateRelation {
                     })
                     .collect::<Result<Vec<GroupByScalar>>>()?;
 
-                //TODO: find more elegant way to write this instead of hacking 
around ownership issues
+                //TODO: find more elegant way to write this instead of hacking 
around
+                // ownership issues
 
                 let updated = match map.get(&key) {
                     Some(entry) => {
diff --git a/rust/datafusion/src/execution/expression.rs 
b/rust/datafusion/src/execution/expression.rs
index fa6201a..48a90a3 100644
--- a/rust/datafusion/src/execution/expression.rs
+++ b/rust/datafusion/src/execution/expression.rs
@@ -298,12 +298,13 @@ pub fn compile_scalar_expr(
 ) -> Result<RuntimeExpr> {
     match expr {
         &Expr::Literal(ref value) => match value {
-            //NOTE: this is a temporary hack .. due to the way expressions 
like 'a > 1' are
-            // evaluated, currently the left and right are evaluated 
separately and must result
-            // in arrays and then the '>' operator is evaluated against the 
two arrays. This works
-            // but is dumb ... I intend to optimize this soon to add special 
handling for
-            // binary expressions that involve literal values to avoid 
creating arrays of literals
-            // filed as https://github.com/andygrove/datafusion/issues/191
+            //NOTE: this is a temporary hack .. due to the way expressions 
like 'a > 1'
+            // are evaluated, currently the left and right are evaluated
+            // separately and must result in arrays and then the '>' operator
+            // is evaluated against the two arrays. This works but is dumb ...
+            // I intend to optimize this soon to add special handling for
+            // binary expressions that involve literal values to avoid 
creating arrays of
+            // literals filed as 
https://github.com/andygrove/datafusion/issues/191
             ScalarValue::Int8(n) => literal_array!(n, Int8Array, Int8),
             ScalarValue::Int16(n) => literal_array!(n, Int16Array, Int16),
             ScalarValue::Int32(n) => literal_array!(n, Int32Array, Int32),
diff --git a/rust/datafusion/src/execution/filter.rs 
b/rust/datafusion/src/execution/filter.rs
index ba20dca..32c1628 100644
--- a/rust/datafusion/src/execution/filter.rs
+++ b/rust/datafusion/src/execution/filter.rs
@@ -192,7 +192,8 @@ fn filter(array: &Arc<Array>, filter: &BooleanArray) -> 
Result<ArrayRef> {
             Ok(Arc::new(builder.finish()))
         }
         DataType::Utf8 => {
-            //TODO: this is inefficient and we should improve the Arrow impl 
to help make this more concise
+            //TODO: this is inefficient and we should improve the Arrow impl 
to help make
+            // this more concise
             let b = a.as_any().downcast_ref::<BinaryArray>().unwrap();
             let mut values: Vec<String> = Vec::with_capacity(b.len());
             for i in 0..b.len() {
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
index 6ea2a36..a93872c 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/lib.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! DataFusion is a modern distributed compute platform implemented in Rust 
that uses Apache Arrow
-//! as the memory model
+//! DataFusion is a modern distributed compute platform implemented in Rust 
that uses
+//! Apache Arrow as the memory model
 
 extern crate arrow;
 #[macro_use]
diff --git a/rust/datafusion/src/logicalplan.rs 
b/rust/datafusion/src/logicalplan.rs
index 7dd4602..5e3b17e 100644
--- a/rust/datafusion/src/logicalplan.rs
+++ b/rust/datafusion/src/logicalplan.rs
@@ -316,8 +316,8 @@ impl fmt::Debug for Expr {
     }
 }
 
-/// The LogicalPlan represents different types of relations (such as 
Projection, Selection, etc) and
-/// can be created by the SQL query planner and the DataFrame API.
+/// The LogicalPlan represents different types of relations (such as 
Projection,
+/// Selection, etc) and can be created by the SQL query planner and the 
DataFrame API.
 #[derive(Serialize, Deserialize, Clone)]
 pub enum LogicalPlan {
     /// A Projection (essentially a SELECT with an expression list)
diff --git a/rust/datafusion/src/sqlplanner.rs 
b/rust/datafusion/src/sqlplanner.rs
index fc8048f..a45d5a4 100644
--- a/rust/datafusion/src/sqlplanner.rs
+++ b/rust/datafusion/src/sqlplanner.rs
@@ -313,7 +313,8 @@ impl SqlToRel {
                             .map(|a| self.sql_to_rex(a, schema))
                             .collect::<Result<Vec<Expr>>>()?;
 
-                        // return type is same as the argument type for these 
aggregate functions
+                        // return type is same as the argument type for these 
aggregate
+                        // functions
                         let return_type = rex_args[0].get_type(schema).clone();
 
                         Ok(Expr::AggregateFunction {
@@ -326,7 +327,8 @@ impl SqlToRel {
                         let rex_args = args
                             .iter()
                             .map(|a| match a {
-                                // this feels hacky but translate 
COUNT(1)/COUNT(*) to COUNT(first_column)
+                                // this feels hacky but translate 
COUNT(1)/COUNT(*) to
+                                // COUNT(first_column)
                                 
ASTNode::SQLValue(sqlparser::sqlast::Value::Long(1)) => {
                                     Ok(Expr::Column(0))
                                 }

[arrow] branch master updated: ARROW-4196: [Rust] Add explicit SIMD vectorization for arithmetic ops in "array_ops"

Reply via email to