This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d53df60565 feat: Optimize from_bitwise_binary_op with 64-bit alignment 
(#9441)
d53df60565 is described below

commit d53df605656d8012eca42e8ddffe165362a1a4cb
Author: Kunal <[email protected]>
AuthorDate: Fri Mar 20 01:44:01 2026 +0530

    feat: Optimize from_bitwise_binary_op with 64-bit alignment (#9441)
    
    # Which issue does this PR close?
    
    
    - Closes #9378
    
    # Rationale for this change
    
    the optimizations as listed in the issue description
    
    - Align to 8 bytes
    - Don't try to return a buffer with bit_offset 0 but round it to a
    multiple of 64
    - Use chunk_exact for the fallback path
    
    
    # What changes are included in this PR?
    
    When both inputs share the same sub-64-bit alignment (left_offset % 64
    == right_offset % 64), the optimized path is used. This covers the
    common cases (both offset 0, both sliced equally, etc.). The BitChunks
    fallback is retained only when the two offsets have different sub-64-bit
    alignment.
    
    # Are these changes tested?
    
    Yes the tests are changed and they are included
    
    # Are there any user-facing changes?
    
    
    Yes, this is a minor breaking change to from_bitwise_binary_op:
    
    - The returned BooleanBuffer may now have a non-zero offset (previously
    always 0)
    - The returned BooleanBuffer may have padding bits set outside the
    logical range in values()
    
    ---------
    
    Signed-off-by: Kunal Singh Dadhwal <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow-buffer/src/buffer/boolean.rs | 230 +++++++++++++++++++++++++++++++------
 arrow-buffer/src/buffer/ops.rs     | 102 ++++++++++++++--
 2 files changed, 288 insertions(+), 44 deletions(-)

diff --git a/arrow-buffer/src/buffer/boolean.rs 
b/arrow-buffer/src/buffer/boolean.rs
index bae083b3b2..420bbf59f3 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -290,7 +290,8 @@ impl BooleanBuffer {
     ///   on the relevant bits; the input `u64` values may contain irrelevant 
bits
     ///   and may be processed differently on different endian architectures.
     /// * `op` may be called with input bits outside the requested range.
-    /// * The returned `BooleanBuffer` always has zero offset.
+    /// * Returned `BooleanBuffer` may have non zero offset
+    /// * Returned `BooleanBuffer` may have bits set outside the requested 
range
     ///
     /// # See Also
     /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a 
single input buffer.
@@ -305,19 +306,28 @@ impl BooleanBuffer {
     /// let result = BooleanBuffer::from_bitwise_binary_op(
     ///   &left, 0, &right, 0, 12, |a, b| a & b
     /// );
-    /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
+    /// assert_eq!(result.len(), 12);
+    /// for i in 0..12 {
+    ///     assert_eq!(result.value(i), left.as_slice()[i / 8] >> (i % 8) & 1 
== 1
+    ///         && right.as_slice()[i / 8] >> (i % 8) & 1 == 1);
+    /// }
     /// ```
     ///
     /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte 
slices
     /// ```
-    /// # use arrow_buffer::BooleanBuffer;
+    /// # use arrow_buffer::{BooleanBuffer, bit_util};
     /// let left = [0b11001100u8, 0b10111010u8];
     /// let right = [0b10101010u8, 0b11011100u8];
     /// // OR of bits 4..16 from left and bits 0..12 from right
     /// let result = BooleanBuffer::from_bitwise_binary_op(
     ///  &left, 4, &right, 0, 12, |a, b| a | b
     /// );
-    /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
+    /// assert_eq!(result.len(), 12);
+    /// for i in 0..12 {
+    ///     let l = bit_util::get_bit(&left, 4 + i);
+    ///     let r = bit_util::get_bit(&right, i);
+    ///     assert_eq!(result.value(i), l | r);
+    /// }
     /// ```
     pub fn from_bitwise_binary_op<F>(
         left: impl AsRef<[u8]>,
@@ -332,39 +342,74 @@ impl BooleanBuffer {
     {
         let left = left.as_ref();
         let right = right.as_ref();
-        // try fast path for aligned input
-        // If the underlying buffers are aligned to u64 we can apply the 
operation directly on the u64 slices
-        // to improve performance.
-        if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
-            // align to byte boundary
-            let left = &left[left_offset_in_bits / 8..];
-            let right = &right[right_offset_in_bits / 8..];
-
-            unsafe {
-                let (left_prefix, left_u64s, left_suffix) = 
left.align_to::<u64>();
-                let (right_prefix, right_u64s, right_suffix) = 
right.align_to::<u64>();
-                // if there is no prefix or suffix, both buffers are aligned 
and
-                // we can do the operation directly on u64s.
-                // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` 
when MSRV reaches 1.88.
-                // 
https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
-                if left_prefix.is_empty()
-                    && right_prefix.is_empty()
-                    && left_suffix.is_empty()
-                    && right_suffix.is_empty()
-                {
-                    let result_u64s = left_u64s
+
+        // When both offsets share the same sub-64-bit alignment, we can
+        // align both to 64-bit boundaries and zip u64s directly,
+        // avoiding BitChunks bit-shifting entirely.
+        if left_offset_in_bits % 64 == right_offset_in_bits % 64 {
+            let bit_offset = left_offset_in_bits % 64;
+            let left_end = left_offset_in_bits + len_in_bits;
+            let right_end = right_offset_in_bits + len_in_bits;
+
+            let left_aligned = left_offset_in_bits & !63;
+            let right_aligned = right_offset_in_bits & !63;
+
+            let left_end_bytes = (bit_util::ceil(left_end, 64) * 
8).min(left.len());
+            let right_end_bytes = (bit_util::ceil(right_end, 64) * 
8).min(right.len());
+
+            let left_slice = &left[left_aligned / 8..left_end_bytes];
+            let right_slice = &right[right_aligned / 8..right_end_bytes];
+
+            let (lp, left_u64s, ls) = unsafe { left_slice.align_to::<u64>() };
+            let (rp, right_u64s, rs) = unsafe { right_slice.align_to::<u64>() 
};
+
+            match (lp, ls, rp, rs) {
+                ([], [], [], []) => {
+                    let result_u64s: Vec<u64> = left_u64s
                         .iter()
                         .zip(right_u64s.iter())
                         .map(|(l, r)| op(*l, *r))
-                        .collect::<Vec<u64>>();
-                    return BooleanBuffer {
-                        buffer: Buffer::from(result_u64s),
-                        bit_offset: 0,
-                        bit_len: len_in_bits,
-                    };
+                        .collect();
+                    return BooleanBuffer::new(result_u64s.into(), bit_offset, 
len_in_bits);
                 }
+                ([], left_suf, [], right_suf) => {
+                    let left_iter = left_u64s
+                        .iter()
+                        .cloned()
+                        .chain((!left_suf.is_empty()).then(|| 
read_u64(left_suf)));
+                    let right_iter = right_u64s
+                        .iter()
+                        .cloned()
+                        .chain((!right_suf.is_empty()).then(|| 
read_u64(right_suf)));
+                    let result_u64s: Vec<u64> =
+                        left_iter.zip(right_iter).map(|(l, r)| op(l, 
r)).collect();
+                    return BooleanBuffer::new(result_u64s.into(), bit_offset, 
len_in_bits);
+                }
+                _ => {}
             }
+
+            // Memory not u64-aligned, use chunks_exact fallback
+            let left_chunks = left_slice.chunks_exact(8);
+            let left_rem = left_chunks.remainder();
+            let right_chunks = right_slice.chunks_exact(8);
+            let right_rem = right_chunks.remainder();
+
+            let left_iter = left_chunks.map(|c| 
u64::from_le_bytes(c.try_into().unwrap()));
+            let right_iter = right_chunks.map(|c| 
u64::from_le_bytes(c.try_into().unwrap()));
+
+            let result_u64s: Vec<u64> = if left_rem.is_empty() && 
right_rem.is_empty() {
+                left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect()
+            } else {
+                left_iter
+                    .chain(Some(read_u64(left_rem)))
+                    .zip(right_iter.chain(Some(read_u64(right_rem))))
+                    .map(|(l, r)| op(l, r))
+                    .collect()
+            };
+            return BooleanBuffer::new(result_u64s.into(), bit_offset, 
len_in_bits);
         }
+
+        // Different sub-64-bit alignments: bit-shifting unavoidable
         let left_chunks = BitChunks::new(left, left_offset_in_bits, 
len_in_bits);
         let right_chunks = BitChunks::new(right, right_offset_in_bits, 
len_in_bits);
 
@@ -479,7 +524,7 @@ impl BooleanBuffer {
         }
     }
 
-    /// Returns a [`Buffer`] containing the sliced contents of this 
[`BooleanBuffer`]
+    /// Returns a new [`Buffer`] containing the sliced contents of this 
[`BooleanBuffer`]
     ///
     /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
     pub fn sliced(&self) -> Buffer {
@@ -994,6 +1039,127 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback() {
+        // Exercise the shared-alignment fast path when both inputs are 
misaligned in memory,
+        // forcing the chunks_exact fallback instead of align_to::<u64>().
+        let left_bytes = [
+            0,           // dropped so `&left_bytes[1..]` is not u64-aligned 
in memory
+            0b1101_0010, // logical left bits start at bit 3 of this byte
+            0b0110_1101,
+            0b1010_0111,
+            0b0001_1110,
+            0b1110_0001,
+            0b0101_1010,
+            0b1001_0110,
+            0b0011_1100,
+            0b1011_0001,
+            0b0100_1110,
+            0b1100_0011,
+            0b0111_1000,
+        ];
+        let right_bytes = [
+            0,           // dropped so `&right_bytes[1..]` is not u64-aligned 
in memory
+            0b1010_1100, // logical right bits start at bit 67 == bit 3 of the 
second 64-bit block
+            0b0101_0011,
+            0b1111_0000,
+            0b0011_1010,
+            0b1000_1111,
+            0b0110_0101,
+            0b1101_1000,
+            0b0001_0111,
+            0b1110_0100,
+            0b0010_1101,
+            0b1001_1010,
+            0b0111_0001,
+        ];
+
+        let left = &left_bytes[1..];
+        let right = &right_bytes[1..];
+
+        let left_offset = 3;
+        let right_offset = 67; // same mod 64 as left_offset, so this takes 
the shared-alignment path
+        let len = 24; // leaves a partial trailing chunk, so this covers the 
non-empty remainder branch
+
+        let result = BooleanBuffer::from_bitwise_binary_op(
+            left,
+            left_offset,
+            right,
+            right_offset,
+            len,
+            |a, b| a & b,
+        );
+        let expected = (0..len)
+            .map(|i| {
+                bit_util::get_bit(left, left_offset + i)
+                    & bit_util::get_bit(right, right_offset + i)
+            })
+            .collect::<BooleanBuffer>();
+
+        assert_eq!(result, expected);
+        assert_eq!(result.offset(), left_offset % 64);
+    }
+
+    #[test]
+    fn 
test_from_bitwise_binary_op_same_mod_64_unaligned_fallback_no_remainder() {
+        // Force the chunks_exact fallback with an exact 8-byte chunk so both 
remainders are empty.
+        let left_bytes = [
+            0,           // dropped so `&left_bytes[1..]` is not u64-aligned 
in memory
+            0b1010_1100, // logical left bits start at bit 3 of this byte
+            0b0110_1001,
+            0b1101_0011,
+            0b0001_1110,
+            0b1110_0101,
+            0b0101_1000,
+            0b1001_0111,
+            0b0011_1101,
+        ];
+        let right_bytes = [
+            0,           // dropped so `&right_bytes[1..]` is not u64-aligned 
in memory
+            0b0111_0010, // logical right bits start at bit 67 == bit 3 of the 
second 64-bit block
+            0b1010_1001,
+            0b0101_1110,
+            0b1100_0011,
+            0b0011_1011,
+            0b1000_1110,
+            0b1111_0001,
+            0b0100_1101,
+            0b1011_0110,
+            0b0001_1011,
+            0b1101_0100,
+            0b0110_0011,
+            0b1001_1110,
+            0b0010_1001,
+            0b1110_0110,
+            0b0101_0001,
+        ];
+
+        let left = &left_bytes[1..];
+        let right = &right_bytes[1..];
+
+        let left_offset = 3;
+        let right_offset = 67; // same mod 64 as left_offset, so this takes 
the shared-alignment path
+        let len = 61; // 3 + 61 = 64, so the aligned slices are exactly one 
8-byte chunk with empty remainders
+
+        let result = BooleanBuffer::from_bitwise_binary_op(
+            left,
+            left_offset,
+            right,
+            right_offset,
+            len,
+            |a, b| a | b,
+        );
+        let expected = (0..len)
+            .map(|i| {
+                bit_util::get_bit(left, left_offset + i)
+                    | bit_util::get_bit(right, right_offset + i)
+            })
+            .collect::<BooleanBuffer>();
+
+        assert_eq!(result, expected);
+        assert_eq!(result.offset(), left_offset % 64);
+    }
+
     #[test]
     fn test_extend_trusted_len_sets_byte_len() {
         // Ensures extend_trusted_len keeps the underlying byte length in sync 
with bit length.
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index 36efe87643..793bbaf6c2 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -143,6 +143,9 @@ where
 
 /// Apply a bitwise and to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are 
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s 
directly
 pub fn buffer_bin_and(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -150,19 +153,27 @@ pub fn buffer_bin_and(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a & b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise or to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are 
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s 
directly
 pub fn buffer_bin_or(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -170,19 +181,27 @@ pub fn buffer_bin_or(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a | b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise xor to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are 
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s 
directly
 pub fn buffer_bin_xor(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -190,19 +209,27 @@ pub fn buffer_bin_xor(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a ^ b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise and_not to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are 
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s 
directly
 pub fn buffer_bin_and_not(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -210,19 +237,70 @@ pub fn buffer_bin_and_not(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a & !b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise not to one input and return the result as a Buffer.
 /// The input is treated as a bitmap, meaning that offset and length are 
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_unary_op`] for creating `BooleanBuffer`s 
directly
 pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: 
usize) -> Buffer {
     BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, 
|a| !a).into_inner()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_buffer_bin_ops_return_zero_offset_buffers() {
+        let left = Buffer::from(vec![0b1010_1100, 0b0110_1001]);
+        let right = Buffer::from(vec![0, 0, 0, 0, 0, 0, 0, 0, 0b1110_0101, 
0b0101_1000]);
+
+        let left_offset = 1;
+        let right_offset = 65; // same mod 64 as left_offset, so 
from_bitwise_binary_op returns non-zero offset
+        let len = 7;
+
+        // Reuse the same offset scenario for all four binary wrappers:
+        // each wrapper should return the logically equivalent offset-0 Buffer,
+        // even though the underlying BooleanBuffer result has offset 1.
+        for (op, wrapper) in [
+            (
+                (|a, b| a & b) as fn(u64, u64) -> u64,
+                buffer_bin_and as fn(&Buffer, usize, &Buffer, usize, usize) -> 
Buffer,
+            ),
+            (((|a, b| a | b) as fn(u64, u64) -> u64), buffer_bin_or),
+            (((|a, b| a ^ b) as fn(u64, u64) -> u64), buffer_bin_xor),
+            (((|a, b| a & !b) as fn(u64, u64) -> u64), buffer_bin_and_not),
+        ] {
+            let unsliced = BooleanBuffer::from_bitwise_binary_op(
+                &left,
+                left_offset,
+                &right,
+                right_offset,
+                len,
+                op,
+            );
+            assert_eq!(unsliced.offset(), 1);
+
+            let result = wrapper(&left, left_offset, &right, right_offset, 
len);
+
+            assert_eq!(result, unsliced.sliced());
+            assert_eq!(result.len(), 1);
+        }
+    }
+}

Reply via email to