alamb commented on code in PR #8996:
URL: https://github.com/apache/arrow-rs/pull/8996#discussion_r2620022082
##########
arrow-buffer/src/buffer/boolean.rs:
##########
@@ -96,12 +96,126 @@ impl BooleanBuffer {
Self::new(buffer.into(), 0, len)
}
+ /// Create a new [`BooleanBuffer`] by copying the relevant bits from an
+ /// input buffer.
+ ///
+ /// # Notes:
+ /// * The new `BooleanBuffer` has zero offset
+ ///
+ /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in
input slice
+ /// ```
+ /// # use arrow_buffer::BooleanBuffer;
+ /// let input = [0b11001100u8, 0b10111010u8];
+ /// // // Copy bits 4..16 from input
+ /// let result = BooleanBuffer::from_bits(&input, 4, 12);
+ /// assert_eq!(result.values(), &[0b10101100u8, 0b00001011u8]);
+ pub fn from_bits(left: impl AsRef<[u8]>, offset_in_bits: usize,
len_in_bits: usize) -> Self {
+ Self::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| a)
+ }
+
+ /// Create a new [`BooleanBuffer`] by applying the bitwise operation to
`op`
+ /// to an input buffer.
+ ///
+ /// This function is faster than applying the operation bit by bit as
+ /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+ ///
+ /// # Notes:
+ /// * `op` takes a single `u64` inputs and produces one `u64` output.
+ /// * `op` must only apply bitwise operations
+ /// on the relevant bits; the input `u64` may contain irrelevant bits
+ /// and may be processed differently on different endian architectures.
+ /// * The output always has zero offset
+ ///
+ /// # See Also
+ /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for
in-place unary bitwise operations
+ ///
+ /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input
[`Buffer`]
+ /// ```
+ /// # use arrow_buffer::BooleanBuffer;
+ /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
+ /// // NOT of the first 12 bits
+ /// let result = BooleanBuffer::from_bitwise_unary_op(
+ /// &input, 0, 12, |a| !a
+ /// );
+ /// assert_eq!(result.values(), &[0b00110011u8, 0b11110101u8]);
+ /// ```
+ pub fn from_bitwise_unary_op<F>(
+ left: impl AsRef<[u8]>,
+ offset_in_bits: usize,
+ len_in_bits: usize,
+ mut op: F,
+ ) -> Self
+ where
+ F: FnMut(u64) -> u64,
+ {
+ // try fast path for aligned input
+ if offset_in_bits % 8 == 0 {
+ if let Some(result) = Self::try_from_aligned_bitwise_unary_op(
+ &left.as_ref()[offset_in_bits / 8..], // align to byte boundary
+ len_in_bits,
+ &mut op,
+ ) {
+ return result;
+ }
+ }
+
+ // each chunk is 64 bits
+ let left_chunks = BitChunks::new(left.as_ref(), offset_in_bits,
len_in_bits);
+ let mut result = MutableBuffer::with_capacity(left_chunks.num_u64s() *
8);
+ for left in left_chunks.iter() {
+ // SAFETY: we have reserved enough capacity above, and we are
+ // pushing exactly num_u64s() items and `BitChunks` correctly
+ // reports its upper bound
+ unsafe {
+ result.push_unchecked(op(left));
+ }
+ }
+ if left_chunks.remainder_len() > 0 {
+ debug_assert!(result.capacity() >= result.len() + 8); // should
not reallocate
+ result.push(op(left_chunks.remainder_bits()));
+ // Just pushed one u64, which may have have trailing zeros,
+ result.truncate(left_chunks.num_bytes());
+ }
+
+ BooleanBuffer {
+ buffer: Buffer::from(result),
+ offset: 0,
+ len: len_in_bits,
+ }
+ }
+
+ /// Like [`Self::from_bitwise_unary_op`] but optimized for the case where
the
+ /// input is aligned to byte boundaries
+ fn try_from_aligned_bitwise_unary_op<F>(
Review Comment:
BTW I wrote a version of this code to handle works for byte aligned, but it
actually seems to have made performance worse, so I am going to update the
comments and leave it this way
<details>
<summary>What I tried</summary>
```rust
/// Like [`Self::from_bitwise_unary_op`] but optimized for the case
where the
/// input is aligned to byte boundaries
fn try_from_aligned_bitwise_unary_op<F>(
left: &[u8],
len_in_bits: usize,
op: &mut F,
) -> Option<Self>
where
F: FnMut(u64) -> u64,
{
// safety: all valid bytes are valid u64s
let (left_prefix, left_u64s, left_suffix) = unsafe {
left.align_to::<u64>() };
// if there is no prefix or suffix, the buffer is aligned and we can
do
// the operation directly on u64s
if left_prefix.is_empty() && left_suffix.is_empty() {
let result_u64s: Vec<u64> = left_u64s.iter().map(|l|
op(*l)).collect();
let buffer = Buffer::from(result_u64s);
return Some(BooleanBuffer::new(buffer, 0, len_in_bits));
}
let mut result = MutableBuffer::with_capacity(
left_prefix.len() + left_u64s.len() * 8 + left_suffix.len(),
);
let prefix_u64 = op(Self::byte_slice_to_u64(left_prefix));
result.extend_from_slice(&prefix_u64.to_be_bytes()[0..left_prefix.len()]);
assert!(result.capacity() >= result.len() + left_u64s.len() * 8);
for &left in left_u64s.iter() {
// SAFETY: we asserted there is enough capacity above
unsafe {
result.push_unchecked(op(left));
}
}
let suffix_u64 = op(Self::byte_slice_to_u64(left_suffix));
result.extend_from_slice(&suffix_u64.to_be_bytes()[0..left_suffix.len()]);
Some(BooleanBuffer::new(result.into(), 0, len_in_bits))
}
```
```diff
diff --git a/arrow-buffer/src/buffer/boolean.rs
b/arrow-buffer/src/buffer/boolean.rs
index 97674c18843..285888b3a7c 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -18,10 +18,11 @@
use crate::bit_chunk_iterator::BitChunks;
use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator,
BitIterator, BitSliceIterator};
use crate::{
- BooleanBufferBuilder, Buffer, MutableBuffer, bit_util, buffer_bin_and,
buffer_bin_or,
- buffer_bin_xor, buffer_unary_not,
+ BooleanBufferBuilder, Buffer, MutableBuffer, ToByteSlice, bit_util,
buffer_bin_and,
+ buffer_bin_or, buffer_bin_xor, buffer_unary_not,
};
+use crate::bit_util::get_remainder_bits;
use std::ops::{BitAnd, BitOr, BitXor, Not};
/// A slice-able [`Buffer`] containing bit-packed booleans
@@ -200,14 +201,37 @@ impl BooleanBuffer {
// the operation directly on u64s
if left_prefix.is_empty() && left_suffix.is_empty() {
let result_u64s: Vec<u64> = left_u64s.iter().map(|l|
op(*l)).collect();
- Some(BooleanBuffer::new(
- Buffer::from(result_u64s),
- 0,
- len_in_bits,
- ))
- } else {
- None
+ let buffer = Buffer::from(result_u64s);
+ return Some(BooleanBuffer::new(buffer, 0, len_in_bits));
}
+
+ let mut result = MutableBuffer::with_capacity(
+ left_prefix.len() + left_u64s.len() * 8 + left_suffix.len(),
+ );
+ let prefix_u64 = op(Self::byte_slice_to_u64(left_prefix));
+
+
result.extend_from_slice(&prefix_u64.to_be_bytes()[0..left_prefix.len()]);
+
+ assert!(result.capacity() >= result.len() + left_u64s.len() * 8);
+ for &left in left_u64s.iter() {
+ // SAFETY: we asserted there is enough capacity above
+ unsafe {
+ result.push_unchecked(op(left));
+ }
+ }
+
+ let suffix_u64 = op(Self::byte_slice_to_u64(left_suffix));
+
result.extend_from_slice(&suffix_u64.to_be_bytes()[0..left_suffix.len()]);
+
+ Some(BooleanBuffer::new(result.into(), 0, len_in_bits))
+ }
+
+ /// convert the bytes into a u64 suitable for opeartion
+ fn byte_slice_to_u64(src: &[u8]) -> u64 {
+ let num_bytes = src.len();
+ let mut bytes = [0u8; 8];
+ bytes[0..num_bytes].copy_from_slice(src);
+ u64::from_be_bytes(bytes)
}
/// Returns the number of set bits in this buffer
diff --git a/arrow-buffer/src/util/bit_util.rs
b/arrow-buffer/src/util/bit_util.rs
```
</details>
This PR
```
not_slice_24 time: [81.729 ns 82.091 ns 82.587 ns]
```
When I tried fancier code for byte alignment:
```
not_slice_24 time: [121.13 ns 122.69 ns 124.52 ns]
```
##########
arrow-buffer/src/buffer/boolean.rs:
##########
@@ -96,12 +96,126 @@ impl BooleanBuffer {
Self::new(buffer.into(), 0, len)
}
+ /// Create a new [`BooleanBuffer`] by copying the relevant bits from an
+ /// input buffer.
+ ///
+ /// # Notes:
+ /// * The new `BooleanBuffer` has zero offset
+ ///
+ /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in
input slice
+ /// ```
+ /// # use arrow_buffer::BooleanBuffer;
+ /// let input = [0b11001100u8, 0b10111010u8];
+ /// // // Copy bits 4..16 from input
+ /// let result = BooleanBuffer::from_bits(&input, 4, 12);
+ /// assert_eq!(result.values(), &[0b10101100u8, 0b00001011u8]);
+ pub fn from_bits(left: impl AsRef<[u8]>, offset_in_bits: usize,
len_in_bits: usize) -> Self {
+ Self::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| a)
+ }
+
+ /// Create a new [`BooleanBuffer`] by applying the bitwise operation to
`op`
+ /// to an input buffer.
+ ///
+ /// This function is faster than applying the operation bit by bit as
+ /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+ ///
+ /// # Notes:
+ /// * `op` takes a single `u64` inputs and produces one `u64` output.
+ /// * `op` must only apply bitwise operations
+ /// on the relevant bits; the input `u64` may contain irrelevant bits
+ /// and may be processed differently on different endian architectures.
+ /// * The output always has zero offset
+ ///
+ /// # See Also
+ /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for
in-place unary bitwise operations
+ ///
+ /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input
[`Buffer`]
+ /// ```
+ /// # use arrow_buffer::BooleanBuffer;
+ /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
+ /// // NOT of the first 12 bits
+ /// let result = BooleanBuffer::from_bitwise_unary_op(
+ /// &input, 0, 12, |a| !a
+ /// );
+ /// assert_eq!(result.values(), &[0b00110011u8, 0b11110101u8]);
+ /// ```
+ pub fn from_bitwise_unary_op<F>(
+ left: impl AsRef<[u8]>,
+ offset_in_bits: usize,
+ len_in_bits: usize,
+ mut op: F,
+ ) -> Self
+ where
+ F: FnMut(u64) -> u64,
+ {
+ // try fast path for aligned input
+ if offset_in_bits % 8 == 0 {
+ if let Some(result) = Self::try_from_aligned_bitwise_unary_op(
+ &left.as_ref()[offset_in_bits / 8..], // align to byte boundary
+ len_in_bits,
+ &mut op,
+ ) {
+ return result;
+ }
+ }
+
+ // each chunk is 64 bits
+ let left_chunks = BitChunks::new(left.as_ref(), offset_in_bits,
len_in_bits);
+ let mut result = MutableBuffer::with_capacity(left_chunks.num_u64s() *
8);
+ for left in left_chunks.iter() {
+ // SAFETY: we have reserved enough capacity above, and we are
+ // pushing exactly num_u64s() items and `BitChunks` correctly
+ // reports its upper bound
+ unsafe {
+ result.push_unchecked(op(left));
+ }
+ }
+ if left_chunks.remainder_len() > 0 {
+ debug_assert!(result.capacity() >= result.len() + 8); // should
not reallocate
+ result.push(op(left_chunks.remainder_bits()));
Review Comment:
Good idea -- done in 469f2ad1375
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]