alamb commented on code in PR #8996:
URL: https://github.com/apache/arrow-rs/pull/8996#discussion_r2620022082


##########
arrow-buffer/src/buffer/boolean.rs:
##########
@@ -96,12 +96,126 @@ impl BooleanBuffer {
         Self::new(buffer.into(), 0, len)
     }
 
+    /// Create a new [`BooleanBuffer`] by copying the relevant bits from an
+    /// input buffer.
+    ///
+    /// # Notes:
+    /// * The new `BooleanBuffer` has zero offset
+    ///
+    /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in 
input slice
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let input = [0b11001100u8, 0b10111010u8];
+    /// // // Copy bits 4..16 from input
+    /// let result = BooleanBuffer::from_bits(&input, 4, 12);
+    /// assert_eq!(result.values(), &[0b10101100u8, 0b00001011u8]);
+    pub fn from_bits(left: impl AsRef<[u8]>, offset_in_bits: usize, 
len_in_bits: usize) -> Self {
+        Self::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| a)
+    }
+
+    /// Create a new [`BooleanBuffer`] by applying the bitwise operation to 
`op`
+    /// to an input buffer.
+    ///
+    /// This function is faster than applying the operation bit by bit as
+    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+    ///
+    /// # Notes:
+    /// * `op` takes a single `u64` inputs and produces one `u64` output.
+    /// * `op` must only apply bitwise operations
+    ///   on the relevant bits; the input `u64` may contain irrelevant bits
+    ///   and may be processed differently on different endian architectures.
+    /// * The output always has zero offset
+    ///
+    /// # See Also
+    /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for 
in-place unary bitwise operations
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input 
[`Buffer`]
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
+    /// // NOT of the first 12 bits
+    /// let result = BooleanBuffer::from_bitwise_unary_op(
+    ///  &input, 0, 12, |a| !a
+    /// );
+    /// assert_eq!(result.values(), &[0b00110011u8, 0b11110101u8]);
+    /// ```
+    pub fn from_bitwise_unary_op<F>(
+        left: impl AsRef<[u8]>,
+        offset_in_bits: usize,
+        len_in_bits: usize,
+        mut op: F,
+    ) -> Self
+    where
+        F: FnMut(u64) -> u64,
+    {
+        // try fast path for aligned input
+        if offset_in_bits % 8 == 0 {
+            if let Some(result) = Self::try_from_aligned_bitwise_unary_op(
+                &left.as_ref()[offset_in_bits / 8..], // align to byte boundary
+                len_in_bits,
+                &mut op,
+            ) {
+                return result;
+            }
+        }
+
+        // each chunk is 64 bits
+        let left_chunks = BitChunks::new(left.as_ref(), offset_in_bits, 
len_in_bits);
+        let mut result = MutableBuffer::with_capacity(left_chunks.num_u64s() * 
8);
+        for left in left_chunks.iter() {
+            // SAFETY: we have reserved enough capacity above, and we are
+            // pushing exactly num_u64s() items and `BitChunks` correctly
+            // reports its upper bound
+            unsafe {
+                result.push_unchecked(op(left));
+            }
+        }
+        if left_chunks.remainder_len() > 0 {
+            debug_assert!(result.capacity() >= result.len() + 8); // should 
not reallocate
+            result.push(op(left_chunks.remainder_bits()));
+            // Just pushed one u64, which may have have trailing zeros,
+            result.truncate(left_chunks.num_bytes());
+        }
+
+        BooleanBuffer {
+            buffer: Buffer::from(result),
+            offset: 0,
+            len: len_in_bits,
+        }
+    }
+
+    /// Like [`Self::from_bitwise_unary_op`] but optimized for the case where 
the
+    /// input is aligned to byte boundaries
+    fn try_from_aligned_bitwise_unary_op<F>(

Review Comment:
   BTW I wrote a version of this code to handle works for byte aligned, but it 
actually seems to have made performance worse, so I am going to update the 
comments and leave it this way
   
   <details>
   
   <summary>What I tried</summary>
   
   ```rust
   
       /// Like [`Self::from_bitwise_unary_op`] but optimized for the case 
where the
       /// input is aligned to byte boundaries
       fn try_from_aligned_bitwise_unary_op<F>(
           left: &[u8],
           len_in_bits: usize,
           op: &mut F,
       ) -> Option<Self>
       where
           F: FnMut(u64) -> u64,
       {
           // safety: all valid bytes are valid u64s
           let (left_prefix, left_u64s, left_suffix) = unsafe { 
left.align_to::<u64>() };
           // if there is no prefix or suffix, the buffer is aligned and we can 
do
           // the operation directly on u64s
           if left_prefix.is_empty() && left_suffix.is_empty() {
               let result_u64s: Vec<u64> = left_u64s.iter().map(|l| 
op(*l)).collect();
               let buffer = Buffer::from(result_u64s);
               return Some(BooleanBuffer::new(buffer, 0, len_in_bits));
           }
   
           let mut result = MutableBuffer::with_capacity(
               left_prefix.len() + left_u64s.len() * 8 + left_suffix.len(),
           );
           let prefix_u64 = op(Self::byte_slice_to_u64(left_prefix));
   
           
result.extend_from_slice(&prefix_u64.to_be_bytes()[0..left_prefix.len()]);
   
           assert!(result.capacity() >= result.len() + left_u64s.len() * 8);
           for &left in left_u64s.iter() {
               // SAFETY: we asserted there is enough capacity above
               unsafe {
                   result.push_unchecked(op(left));
               }
           }
   
           let suffix_u64 = op(Self::byte_slice_to_u64(left_suffix));
           
result.extend_from_slice(&suffix_u64.to_be_bytes()[0..left_suffix.len()]);
   
           Some(BooleanBuffer::new(result.into(), 0, len_in_bits))
       }
   ```
   
   
   ```diff
   diff --git a/arrow-buffer/src/buffer/boolean.rs 
b/arrow-buffer/src/buffer/boolean.rs
   index 97674c18843..285888b3a7c 100644
   --- a/arrow-buffer/src/buffer/boolean.rs
   +++ b/arrow-buffer/src/buffer/boolean.rs
   @@ -18,10 +18,11 @@
    use crate::bit_chunk_iterator::BitChunks;
    use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, 
BitIterator, BitSliceIterator};
    use crate::{
   -    BooleanBufferBuilder, Buffer, MutableBuffer, bit_util, buffer_bin_and, 
buffer_bin_or,
   -    buffer_bin_xor, buffer_unary_not,
   +    BooleanBufferBuilder, Buffer, MutableBuffer, ToByteSlice, bit_util, 
buffer_bin_and,
   +    buffer_bin_or, buffer_bin_xor, buffer_unary_not,
    };
   
   +use crate::bit_util::get_remainder_bits;
    use std::ops::{BitAnd, BitOr, BitXor, Not};
   
    /// A slice-able [`Buffer`] containing bit-packed booleans
   @@ -200,14 +201,37 @@ impl BooleanBuffer {
            // the operation directly on u64s
            if left_prefix.is_empty() && left_suffix.is_empty() {
                let result_u64s: Vec<u64> = left_u64s.iter().map(|l| 
op(*l)).collect();
   -            Some(BooleanBuffer::new(
   -                Buffer::from(result_u64s),
   -                0,
   -                len_in_bits,
   -            ))
   -        } else {
   -            None
   +            let buffer = Buffer::from(result_u64s);
   +            return Some(BooleanBuffer::new(buffer, 0, len_in_bits));
            }
   +
   +        let mut result = MutableBuffer::with_capacity(
   +            left_prefix.len() + left_u64s.len() * 8 + left_suffix.len(),
   +        );
   +        let prefix_u64 = op(Self::byte_slice_to_u64(left_prefix));
   +
   +        
result.extend_from_slice(&prefix_u64.to_be_bytes()[0..left_prefix.len()]);
   +
   +        assert!(result.capacity() >= result.len() + left_u64s.len() * 8);
   +        for &left in left_u64s.iter() {
   +            // SAFETY: we asserted there is enough capacity above
   +            unsafe {
   +                result.push_unchecked(op(left));
   +            }
   +        }
   +
   +        let suffix_u64 = op(Self::byte_slice_to_u64(left_suffix));
   +        
result.extend_from_slice(&suffix_u64.to_be_bytes()[0..left_suffix.len()]);
   +
   +        Some(BooleanBuffer::new(result.into(), 0, len_in_bits))
   +    }
   +
   +    /// convert the bytes into a u64 suitable for opeartion
   +    fn byte_slice_to_u64(src: &[u8]) -> u64 {
   +        let num_bytes = src.len();
   +        let mut bytes = [0u8; 8];
   +        bytes[0..num_bytes].copy_from_slice(src);
   +        u64::from_be_bytes(bytes)
        }
   
        /// Returns the number of set bits in this buffer
   diff --git a/arrow-buffer/src/util/bit_util.rs 
b/arrow-buffer/src/util/bit_util.rs
   ```
   
   </details>
   
   This PR
   ```
   not_slice_24            time:   [81.729 ns 82.091 ns 82.587 ns]
   ```
   
   When I tried fancier code for byte alignment:
   
   ```
   not_slice_24            time:   [121.13 ns 122.69 ns 124.52 ns]
   ```



##########
arrow-buffer/src/buffer/boolean.rs:
##########
@@ -96,12 +96,126 @@ impl BooleanBuffer {
         Self::new(buffer.into(), 0, len)
     }
 
+    /// Create a new [`BooleanBuffer`] by copying the relevant bits from an
+    /// input buffer.
+    ///
+    /// # Notes:
+    /// * The new `BooleanBuffer` has zero offset
+    ///
+    /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in 
input slice
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let input = [0b11001100u8, 0b10111010u8];
+    /// // // Copy bits 4..16 from input
+    /// let result = BooleanBuffer::from_bits(&input, 4, 12);
+    /// assert_eq!(result.values(), &[0b10101100u8, 0b00001011u8]);
+    pub fn from_bits(left: impl AsRef<[u8]>, offset_in_bits: usize, 
len_in_bits: usize) -> Self {
+        Self::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| a)
+    }
+
+    /// Create a new [`BooleanBuffer`] by applying the bitwise operation to 
`op`
+    /// to an input buffer.
+    ///
+    /// This function is faster than applying the operation bit by bit as
+    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+    ///
+    /// # Notes:
+    /// * `op` takes a single `u64` inputs and produces one `u64` output.
+    /// * `op` must only apply bitwise operations
+    ///   on the relevant bits; the input `u64` may contain irrelevant bits
+    ///   and may be processed differently on different endian architectures.
+    /// * The output always has zero offset
+    ///
+    /// # See Also
+    /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for 
in-place unary bitwise operations
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input 
[`Buffer`]
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
+    /// // NOT of the first 12 bits
+    /// let result = BooleanBuffer::from_bitwise_unary_op(
+    ///  &input, 0, 12, |a| !a
+    /// );
+    /// assert_eq!(result.values(), &[0b00110011u8, 0b11110101u8]);
+    /// ```
+    pub fn from_bitwise_unary_op<F>(
+        left: impl AsRef<[u8]>,
+        offset_in_bits: usize,
+        len_in_bits: usize,
+        mut op: F,
+    ) -> Self
+    where
+        F: FnMut(u64) -> u64,
+    {
+        // try fast path for aligned input
+        if offset_in_bits % 8 == 0 {
+            if let Some(result) = Self::try_from_aligned_bitwise_unary_op(
+                &left.as_ref()[offset_in_bits / 8..], // align to byte boundary
+                len_in_bits,
+                &mut op,
+            ) {
+                return result;
+            }
+        }
+
+        // each chunk is 64 bits
+        let left_chunks = BitChunks::new(left.as_ref(), offset_in_bits, 
len_in_bits);
+        let mut result = MutableBuffer::with_capacity(left_chunks.num_u64s() * 
8);
+        for left in left_chunks.iter() {
+            // SAFETY: we have reserved enough capacity above, and we are
+            // pushing exactly num_u64s() items and `BitChunks` correctly
+            // reports its upper bound
+            unsafe {
+                result.push_unchecked(op(left));
+            }
+        }
+        if left_chunks.remainder_len() > 0 {
+            debug_assert!(result.capacity() >= result.len() + 8); // should 
not reallocate
+            result.push(op(left_chunks.remainder_bits()));

Review Comment:
   Good idea -- done in 469f2ad1375



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to