zhuqi-lucas commented on code in PR #7860: URL: https://github.com/apache/arrow-rs/pull/7860#discussion_r2186703686
########## arrow-ord/src/sort.rs: ########## @@ -295,12 +295,81 @@ fn sort_bytes<T: ByteArrayType>( options: SortOptions, limit: Option<usize>, ) -> UInt32Array { - let mut valids = value_indices + // Note: Why do we use 4‑byte prefix? + // Compute the 4‑byte prefix in BE order, or left‑pad if shorter. + // Most byte‐sequences differ in their first few bytes, so by + // comparing up to 4 bytes as a single u32 we avoid the overhead + // of a full lexicographical compare for the vast majority of cases. + + // 1. Build a vector of (index, 64‑bit prefix) pairs + // The high 32 bits store the big‑endian u32 of the first up to 4 bytes, + // the low 32 bits store the original slice length. + // This is similar to inline_key_fast function in GenericByteViewArray. + let mut valids: Vec<(u32, u64)> = value_indices .into_iter() - .map(|index| (index, values.value(index as usize).as_ref())) - .collect::<Vec<(u32, &[u8])>>(); + .map(|idx| { + let slice: &[u8] = values.value(idx as usize).as_ref(); + let len = slice.len() as u32; + // Compute the 4‑byte prefix in BE order, or left‑pad if shorter + let prefix_u32 = if len >= 4 { + let raw = unsafe { std::ptr::read_unaligned(slice.as_ptr() as *const u32) }; + u32::from_be(raw) + } else { + let mut v = 0u32; + for &b in slice { + v = (v << 8) | (b as u32); + } + v << (8 * (4 - len)) + }; + // Pack into a single u64: (prefix << 32) | length + let prefix64 = ((prefix_u32 as u64) << 32) | (len as u64); Review Comment: Thank you @Dandandan for this good suggestion! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org