jhorstmann commented on code in PR #10172:
URL: https://github.com/apache/arrow-rs/pull/10172#discussion_r3454764929
##########
parquet/src/util/bit_util.rs:
##########
@@ -71,21 +81,82 @@ macro_rules! from_le_bytes {
<$ty>::from_le_bytes(bs)
}
}
- impl FromBitpacked for $ty {
- #[inline]
- fn from_u64(v: u64) -> Self {
- v as Self
- }
- }
)*
};
}
+macro_rules! from_bitpacked {
+ ($($ty: ty => $unpack: path),*) => {
+ $(
+ impl FromBitpacked for $ty {
+ const BIT_CAPACITY: usize = std::mem::size_of::<$ty>() * 8;
+ // this has to match the signature of the unpack* functions
+ const BATCH_SIZE: usize = std::mem::size_of::<$ty>() * 8;
+
+ #[inline]
+ fn from_u64(v: u64) -> Self {
+ v as _
+ }
+
+ #[inline]
+ fn unpack_batch(input: &[u8], output: &mut [Self], num_bits:
usize) {
+ $unpack(input, (&mut
output[..Self::BATCH_SIZE]).try_into().unwrap(), num_bits)
+ }
+ }
+ )*
+ }
+}
+
+macro_rules! from_bitpacked_delegate {
+ ($($ty: ty => $delegate: ty),*) => {
+ $(
+ impl FromBitpacked for $ty {
+ const BIT_CAPACITY: usize = <$delegate as
FromBitpacked>::BIT_CAPACITY;
+ const BATCH_SIZE: usize = <$delegate as
FromBitpacked>::BATCH_SIZE;
+
+ fn from_u64(v: u64) -> Self {
+ v as _
+ }
+
+ fn unpack_batch(input: &[u8], output: &mut [Self], num_bits:
usize) {
+ const {
+ assert!(std::mem::size_of::<$ty>() ==
std::mem::size_of::<$delegate>());
+ assert!(std::mem::align_of::<$ty>() ==
std::mem::align_of::<$delegate>());
+ }
+ // Safety: ty and delegate have the same size and
alignment, and this macro is only used for types that have transmutable bit
patterns.
+ let output: &mut [$delegate] = unsafe {
std::slice::from_raw_parts_mut(output.as_mut_ptr().cast::<$delegate>(),
output.len()) };
+ <$delegate>::unpack_batch(input, output, num_bits);
+ }
+ }
+ )*
+ }
+}
+
from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64 }
+from_bitpacked!(u8 => unpack8, u16 => unpack16, u32 => unpack32, u64 =>
unpack64);
+from_bitpacked_delegate!(i8 => u8 , i16 => u16, i32 => u32, i64 => u64);
+
+impl FromBitpacked for bool {
+ const BIT_CAPACITY: usize = 1;
+ const BATCH_SIZE: usize = <u8 as FromBitpacked>::BATCH_SIZE;
+
+ fn from_u64(v: u64) -> Self {
+ v != 0
+ }
-// SAFETY: all bit patterns are valid for f32 and f64.
-unsafe impl FromBytes for f32 {
- const BIT_CAPACITY: usize = 32;
+ fn unpack_batch(input: &[u8], output: &mut [Self], num_bits: usize) {
+ assert!(num_bits == 1);
Review Comment:
The compiler should hoist this outside of any loop, or even realize that for
boolean it will only get called with a bit width of 1.
I just searched for benchmarks for the boolean data type and did not find
any, so it might make sense to add a benchmark first.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]