zhuqi-lucas commented on code in PR #7860:
URL: https://github.com/apache/arrow-rs/pull/7860#discussion_r2249178046
##########
arrow-ord/src/sort.rs:
##########
@@ -295,12 +295,85 @@ fn sort_bytes<T: ByteArrayType>(
options: SortOptions,
limit: Option<usize>,
) -> UInt32Array {
- let mut valids = value_indices
+ // Note: Why do we use 4‑byte prefix?
+ // Compute the 4‑byte prefix in BE order, or left‑pad if shorter.
+ // Most byte‐sequences differ in their first few bytes, so by
+ // comparing up to 4 bytes as a single u32 we avoid the overhead
+ // of a full lexicographical compare for the vast majority of cases.
+
+ // 1. Build a vector of (index, prefix, length) tuples
+ let mut valids: Vec<(u32, u32, u64)> = value_indices
.into_iter()
- .map(|index| (index, values.value(index as usize).as_ref()))
- .collect::<Vec<(u32, &[u8])>>();
+ .map(|idx| {
+ let slice: &[u8] = values.value(idx as usize).as_ref();
+ let len = slice.len() as u64;
+ // Compute the 4‑byte prefix in BE order, or left‑pad if shorter
+ let prefix = if slice.len() >= 4 {
+ let raw = unsafe { std::ptr::read_unaligned(slice.as_ptr() as
*const u32) };
+ u32::from_be(raw)
+ } else {
+ let mut v = 0u32;
+ for &b in slice {
+ v = (v << 8) | (b as u32);
+ }
+ v << (8 * (4 - slice.len()))
+ };
+ (idx, prefix, len)
+ })
+ .collect();
- sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
+ // 2. compute the number of non-null entries to partially sort
+ let vlimit = match (limit, options.nulls_first) {
+ (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
+ _ => valids.len(),
+ };
+
+ // 3. Comparator: compare prefix, then (when both slices shorter than 4)
length, otherwise full slice
+ let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| {
+ let (ia, pa, la) = *a;
+ let (ib, pb, lb) = *b;
+ // 3.1 prefix (first 4 bytes)
+ let ord = pa.cmp(&pb);
+ if ord != Ordering::Equal {
+ return ord;
+ }
+ // 3.2 only if both slices had length < 4 (so prefix was padded)
+ // length compare only when prefix was padded (i.e. original length <
4)
+ if la < 4 || lb < 4 {
+ let ord = la.cmp(&lb);
+ if ord != Ordering::Equal {
+ return ord;
+ }
+ }
+ // 3.3 full lexicographical compare
+ let a_bytes: &[u8] = values.value(ia as usize).as_ref();
Review Comment:
Addressed 1 in latest PR that changed all to not checking bound, 5%
improvement for some cases.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]