Dandandan commented on code in PR #9297:
URL: https://github.com/apache/arrow-rs/pull/9297#discussion_r2779476187


##########
arrow-buffer/src/buffer/boolean.rs:
##########
@@ -191,67 +189,48 @@ impl BooleanBuffer {
     where
         F: FnMut(u64) -> u64,
     {
-        // try fast path for aligned input
-        if offset_in_bits & 0x7 == 0 {
-            // align to byte boundary
-            let aligned = &src.as_ref()[offset_in_bits / 8..];
-            if let Some(result) =
-                Self::try_from_aligned_bitwise_unary_op(aligned, len_in_bits, 
&mut op)
-            {
-                return result;
+        let end = offset_in_bits + len_in_bits;
+        // Align start and end to 64 bit (8 byte) boundaries if possible to 
allow using the
+        // optimized code path as much as possible.
+        let aligned_offset = offset_in_bits & !63;
+        let aligned_end_bytes = bit_util::ceil(end, 64) * 8;
+        let src_len = src.as_ref().len();
+        let slice_end = aligned_end_bytes.min(src_len);
+
+        let aligned_start = &src.as_ref()[aligned_offset / 8..slice_end];
+
+        let (prefix, aligned_u64s, suffix) = unsafe { 
aligned_start.as_ref().align_to::<u64>() };
+        match (prefix, suffix) {
+            ([], []) => {
+                // the buffer is word (64 bit) aligned, so use optimized Vec 
code.
+                let result_u64s: Vec<u64> = aligned_u64s.iter().map(|l| 
op(*l)).collect();
+                return BooleanBuffer::new(result_u64s.into(), offset_in_bits % 
64, len_in_bits);
             }
-        }
-
-        let chunks = BitChunks::new(src.as_ref(), offset_in_bits, len_in_bits);
-        let mut result = MutableBuffer::with_capacity(chunks.num_u64s() * 8);
-        for chunk in chunks.iter() {
-            // SAFETY: reserved enough capacity above, (exactly num_u64s()
-            // items) and we assume `BitChunks` correctly reports upper bound
-            unsafe {
-                result.push_unchecked(op(chunk));
+            ([], suffix) => {
+                let suffix = read_u64(suffix);
+                let result_u64s: Vec<u64> = aligned_u64s
+                    .iter()
+                    .cloned()
+                    .chain(std::iter::once(suffix))
+                    .map(&mut op)
+                    .collect();
+                return BooleanBuffer::new(result_u64s.into(), offset_in_bits % 
64, len_in_bits);
             }
-        }
-        if chunks.remainder_len() > 0 {
-            debug_assert!(result.capacity() >= result.len() + 8); // should 
not reallocate
-            // SAFETY: reserved enough capacity above, (exactly num_u64s()
-            // items) and we assume `BitChunks` correctly reports upper bound
-            unsafe {
-                result.push_unchecked(op(chunks.remainder_bits()));
-            }
-            // Just pushed one u64, which may have trailing zeros
-            result.truncate(chunks.num_bytes());
+            _ => {}
         }
 
-        BooleanBuffer {
-            buffer: Buffer::from(result),
-            bit_offset: 0,
-            bit_len: len_in_bits,
-        }
-    }
+        // align to byte boundaries

Review Comment:
   Ah I made the other path too unlikely by "aligning" input to 64 bits, let's 
add a case for this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to