Dandandan commented on code in PR #19344:
URL: https://github.com/apache/datafusion/pull/19344#discussion_r2622138461


##########
datafusion/common/src/hash_utils.rs:
##########
@@ -221,6 +221,86 @@ fn hash_array_primitive<T>(
     }
 }
 
+// TODO FIX: can't hash view if it isn't inlined (otherwise it can have 
different offsets
+
+/// Builds hash values for array views and writes them into `hashes_buffer`
+/// If `rehash==true` this combines the previous hash value in the buffer
+/// with the new hash using `combine_hashes`
+///
+/// TODO: make general for butesview as well
+#[cfg(not(feature = "force_hash_collisions"))]
+fn hash_string_view_array(
+    array: &StringViewArray,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+    rehash: bool,
+)
+{
+    assert_eq!(
+        hashes_buffer.len(),
+        array.len(),
+        "hashes_buffer and array should be of equal length"
+    );
+
+    let get_value = |v| {
+        let view_len = v as u32;
+        let view = ByteView::from(v);
+        let data = unsafe { 
array.data_buffers().get_unchecked(view.buffer_index as usize) };
+        let offset = view.offset as usize;
+        unsafe { data.get_unchecked(offset..offset + view_len as usize) }
+    };
+
+    if array.null_count() == 0 {
+        if rehash {
+            for (hash, &v) in 
hashes_buffer.iter_mut().zip(array.views().iter()) {
+                let view_len = v as u32;
+                // if the length is not inlined, then we need to hash the 
bytes as well
+                if view_len > 12 {
+                    *hash = 
combine_hashes(get_value(v).hash_one(random_state), *hash);
+                } else {
+                    *hash = combine_hashes(v.hash_one(random_state), *hash);
+                }
+            }
+        } else {
+            for (hash, &v) in 
hashes_buffer.iter_mut().zip(array.views().iter()) {
+                let view_len = v as u32;
+                // if the length is not inlined, then we need to hash the 
bytes as well
+                if view_len > 12 {

Review Comment:
   Could also eliminate this branch when having no data buffers at all.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to