This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch create_hashes_primitive
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit b6ccfa450b86ed31814aaaa1df9a2c52a7513fcb
Author: DaniĆ«l Heres <[email protected]>
AuthorDate: Sat Jul 1 09:47:08 2023 +0200

    Only rehash col >=1
---
 datafusion/physical-expr/src/hash_utils.rs | 68 ++++++++++++++++--------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/datafusion/physical-expr/src/hash_utils.rs 
b/datafusion/physical-expr/src/hash_utils.rs
index de3526992f..e3cb891902 100644
--- a/datafusion/physical-expr/src/hash_utils.rs
+++ b/datafusion/physical-expr/src/hash_utils.rs
@@ -88,31 +88,33 @@ fn hash_array_primitve<T>(
     array: &PrimitiveArray<T>,
     random_state: &RandomState,
     hashes_buffer: &mut [u64],
-    multi_col: bool,
+    rehash: bool,
 ) where
     T: ArrowPrimitiveType,
     <T as arrow_array::ArrowPrimitiveType>::Native: HashValue,
 {
     if array.null_count() == 0 {
-        if multi_col {
-            for (hash, &val) in 
hashes_buffer.iter_mut().zip(array.values().iter()) {
-                *hash = combine_hashes(val.hash_one(&random_state), *hash);
+        if rehash {
+            for (hash, &value) in 
hashes_buffer.iter_mut().zip(array.values().iter()) {
+                *hash = combine_hashes(value.hash_one(&random_state), *hash);
             }
         } else {
-            for (hash, &val) in 
hashes_buffer.iter_mut().zip(array.values().iter()) {
-                *hash = val.hash_one(&random_state);
+            for (hash, &value) in 
hashes_buffer.iter_mut().zip(array.values().iter()) {
+                *hash = value.hash_one(&random_state);
             }
         }
-    } else if multi_col {
+    } else if rehash {
         for (i, hash) in hashes_buffer.iter_mut().enumerate() {
             if !array.is_null(i) {
-                *hash = combine_hashes(array.value(i).hash_one(random_state), 
*hash);
+                let value = unsafe {array.value_unchecked(i)};
+                *hash = combine_hashes(value.hash_one(random_state), *hash);
             }
         }
     } else {
         for (i, hash) in hashes_buffer.iter_mut().enumerate() {
             if !array.is_null(i) {
-                *hash = array.value(i).hash_one(random_state);
+                let value = unsafe {array.value_unchecked(i)};
+                *hash = value.hash_one(random_state);
             }
         }
     }
@@ -122,31 +124,35 @@ fn hash_array<T>(
     array: T,
     random_state: &RandomState,
     hashes_buffer: &mut [u64],
-    multi_col: bool,
+    rehash: bool,
 ) where
     T: ArrayAccessor,
     T::Item: HashValue,
 {
     if array.null_count() == 0 {
-        if multi_col {
+        if rehash {
             for (i, hash) in hashes_buffer.iter_mut().enumerate() {
-                *hash = combine_hashes(array.value(i).hash_one(random_state), 
*hash);
+                let value = unsafe {array.value_unchecked(i)};
+                *hash = combine_hashes(value.hash_one(random_state), *hash);
             }
         } else {
             for (i, hash) in hashes_buffer.iter_mut().enumerate() {
-                *hash = array.value(i).hash_one(random_state);
+                let value= unsafe {array.value_unchecked(i)};
+                *hash = value.hash_one(random_state);
             }
         }
-    } else if multi_col {
+    } else if rehash {
         for (i, hash) in hashes_buffer.iter_mut().enumerate() {
             if !array.is_null(i) {
-                *hash = combine_hashes(array.value(i).hash_one(random_state), 
*hash);
+                let value= unsafe {array.value_unchecked(i)};
+                *hash = combine_hashes(value.hash_one(random_state), *hash);
             }
         }
     } else {
         for (i, hash) in hashes_buffer.iter_mut().enumerate() {
             if !array.is_null(i) {
-                *hash = array.value(i).hash_one(random_state);
+                let value= unsafe {array.value_unchecked(i)};
+                *hash = value.hash_one(random_state);
             }
         }
     }
@@ -242,34 +248,32 @@ pub fn create_hashes<'a>(
     random_state: &RandomState,
     hashes_buffer: &'a mut Vec<u64>,
 ) -> Result<&'a mut Vec<u64>> {
-    // combine hashes with `combine_hashes` if we have more than 1 column
-
-    let multi_col = arrays.len() > 1;
-
-    for col in arrays {
+    for (i, col) in arrays.iter().enumerate() {
         let array = col.as_ref();
+        // combine hashes with `combine_hashes` for all columns besides the 
first
+        let rehash = i >= 1;
         downcast_primitive_array! {
-            array => hash_array_primitve(array, random_state, hashes_buffer, 
multi_col),
-            DataType::Null => hash_null(random_state, hashes_buffer, 
multi_col),
-            DataType::Boolean => hash_array(as_boolean_array(array)?, 
random_state, hashes_buffer, multi_col),
-            DataType::Utf8 => hash_array(as_string_array(array)?, 
random_state, hashes_buffer, multi_col),
-            DataType::LargeUtf8 => hash_array(as_largestring_array(array), 
random_state, hashes_buffer, multi_col),
-            DataType::Binary => 
hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, 
multi_col),
-            DataType::LargeBinary => 
hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, 
multi_col),
+            array => hash_array_primitve(array, random_state, hashes_buffer, 
rehash),
+            DataType::Null => hash_null(random_state, hashes_buffer, rehash),
+            DataType::Boolean => hash_array(as_boolean_array(array)?, 
random_state, hashes_buffer, rehash),
+            DataType::Utf8 => hash_array(as_string_array(array)?, 
random_state, hashes_buffer, rehash),
+            DataType::LargeUtf8 => hash_array(as_largestring_array(array), 
random_state, hashes_buffer, rehash),
+            DataType::Binary => 
hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, 
rehash),
+            DataType::LargeBinary => 
hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, 
rehash),
             DataType::FixedSizeBinary(_) => {
                 let array: &FixedSizeBinaryArray = 
array.as_any().downcast_ref().unwrap();
-                hash_array(array, random_state, hashes_buffer, multi_col)
+                hash_array(array, random_state, hashes_buffer, rehash)
             }
             DataType::Decimal128(_, _) => {
                 let array = as_primitive_array::<Decimal128Type>(array)?;
-                hash_array_primitve(array, random_state, hashes_buffer, 
multi_col)
+                hash_array_primitve(array, random_state, hashes_buffer, rehash)
             }
             DataType::Decimal256(_, _) => {
                 let array = as_primitive_array::<Decimal256Type>(array)?;
-                hash_array_primitve(array, random_state, hashes_buffer, 
multi_col)
+                hash_array_primitve(array, random_state, hashes_buffer, rehash)
             }
             DataType::Dictionary(_, _) => downcast_dictionary_array! {
-                array => hash_dictionary(array, random_state, hashes_buffer, 
multi_col)?,
+                array => hash_dictionary(array, random_state, hashes_buffer, 
rehash)?,
                 _ => unreachable!()
             }
             _ => {

Reply via email to