Re: [PR] Perf: Add prefix compare for inlined compare and change use of inline_value to inline it to a u128 [arrow-rs]

via GitHub Wed, 25 Jun 2025 04:02:34 -0700


Dandandan commented on code in PR #7748:
URL: https://github.com/apache/arrow-rs/pull/7748#discussion_r2166438292



##########
arrow-ord/src/cmp.rs:
##########
@@ -642,17 +669,42 @@ pub fn compare_byte_view<T: ByteViewType>(
     left_idx: usize,
     right: &GenericByteViewArray<T>,
     right_idx: usize,
-) -> std::cmp::Ordering {
+) -> Ordering {
     assert!(left_idx < left.len());
     assert!(right_idx < right.len());
     if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
-        let l_view = unsafe { left.views().get_unchecked(left_idx) };
-        let r_view = unsafe { right.views().get_unchecked(right_idx) };
-        let l_len = *l_view as u32 as usize;
-        let r_len = *r_view as u32 as usize;
-        let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, 
l_len) };
-        let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, 
r_len) };
-        return l_bytes.cmp(r_bytes);
+        // Directly load the 16-byte view as an u128 (little-endian)
+        let l_bits: u128 = unsafe { *left.views().get_unchecked(left_idx) };
+        let r_bits: u128 = unsafe { *right.views().get_unchecked(right_idx) };
+
+        // The lower 32 bits encode the length (little-endian),
+        // the upper 96 bits hold the actual data
+        let l_len = l_bits as u32;
+        let r_len = r_bits as u32;
+
+        // Remove the length bits, leaving only the data
+        let l_data = l_bits >> 32;
+        let r_data = r_bits >> 32;
+
+        // The data is stored in little-endian order. To compare 
lexicographically,
+        // convert to big-endian:
+        let l_be = l_data.swap_bytes();
+        let r_be = r_data.swap_bytes();
+
+        // Compare only the first min_len bytes
+        let min_len = l_len.min(r_len);
+        // We have all 12 bytes in the high bits, but only want the top min_len
+        let shift = (12 - min_len) * 8;
+        let l_partial = l_be >> shift;
+        let r_partial = r_be >> shift;
+        if l_partial < r_partial {

Review Comment:
   this can be written as `l_partial.cmp(r_partial).then_with(|| { 
l_len.cmp(&r_len) })



##########
arrow-ord/src/cmp.rs:
##########
@@ -642,17 +669,42 @@ pub fn compare_byte_view<T: ByteViewType>(
     left_idx: usize,
     right: &GenericByteViewArray<T>,
     right_idx: usize,
-) -> std::cmp::Ordering {
+) -> Ordering {
     assert!(left_idx < left.len());
     assert!(right_idx < right.len());
     if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
-        let l_view = unsafe { left.views().get_unchecked(left_idx) };
-        let r_view = unsafe { right.views().get_unchecked(right_idx) };
-        let l_len = *l_view as u32 as usize;
-        let r_len = *r_view as u32 as usize;
-        let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, 
l_len) };
-        let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, 
r_len) };
-        return l_bytes.cmp(r_bytes);
+        // Directly load the 16-byte view as an u128 (little-endian)
+        let l_bits: u128 = unsafe { *left.views().get_unchecked(left_idx) };
+        let r_bits: u128 = unsafe { *right.views().get_unchecked(right_idx) };
+
+        // The lower 32 bits encode the length (little-endian),
+        // the upper 96 bits hold the actual data
+        let l_len = l_bits as u32;
+        let r_len = r_bits as u32;
+
+        // Remove the length bits, leaving only the data
+        let l_data = l_bits >> 32;
+        let r_data = r_bits >> 32;
+
+        // The data is stored in little-endian order. To compare 
lexicographically,
+        // convert to big-endian:
+        let l_be = l_data.swap_bytes();
+        let r_be = r_data.swap_bytes();
+
+        // Compare only the first min_len bytes
+        let min_len = l_len.min(r_len);
+        // We have all 12 bytes in the high bits, but only want the top min_len
+        let shift = (12 - min_len) * 8;
+        let l_partial = l_be >> shift;
+        let r_partial = r_be >> shift;
+        if l_partial < r_partial {

Review Comment:
   this can be written as `l_partial.cmp(r_partial).then_with(|| { 
l_len.cmp(&r_len) })`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Perf: Add prefix compare for inlined compare and change use of inline_value to inline it to a u128 [arrow-rs]

Reply via email to