zhuqi-lucas commented on code in PR #7748:
URL: https://github.com/apache/arrow-rs/pull/7748#discussion_r2166540945


##########
arrow-array/src/array/byte_view_array.rs:
##########
@@ -537,17 +538,37 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         left_idx: usize,
         right: &GenericByteViewArray<T>,
         right_idx: usize,
-    ) -> std::cmp::Ordering {
+    ) -> Ordering {
         let l_view = left.views().get_unchecked(left_idx);
         let l_len = *l_view as u32;
 
         let r_view = right.views().get_unchecked(right_idx);
         let r_len = *r_view as u32;
 
         if l_len <= 12 && r_len <= 12 {
-            let l_data = unsafe { 
GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) };
-            let r_data = unsafe { 
GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) };
-            return l_data.cmp(r_data);
+            // Remove the length bits, leaving only the data
+            let l_data = *l_view >> 32;
+            let r_data = *r_view >> 32;
+
+            // The data is stored in little-endian order. To compare 
lexicographically,
+            // convert to big-endian:
+            let l_be = l_data.swap_bytes();
+            let r_be = r_data.swap_bytes();
+
+            // Compare only the first min_len bytes
+            let min_len = l_len.min(r_len);
+            // We have all 12 bytes in the high bits, but only want the top 
min_len
+            let shift = (12 - min_len) * 8;
+            let l_partial = l_be >> shift;
+            let r_partial = r_be >> shift;
+            if l_partial < r_partial {
+                return Ordering::Less;
+            } else if l_partial > r_partial {
+                return Ordering::Greater;
+            }
+
+            // If the prefixes are equal, the shorter one is considered smaller
+            return l_len.cmp(&r_len);
         }
 
         // one of the string is larger than 12 bytes,

Review Comment:
   Good point @Dandandan  , i will change to ByteView prefix.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to