jhorstmann commented on code in PR #10172:
URL: https://github.com/apache/arrow-rs/pull/10172#discussion_r3454764929


##########
parquet/src/util/bit_util.rs:
##########
@@ -71,21 +81,82 @@ macro_rules! from_le_bytes {
                 <$ty>::from_le_bytes(bs)
             }
         }
-        impl FromBitpacked for $ty {
-            #[inline]
-            fn from_u64(v: u64) -> Self {
-                v as Self
-            }
-        }
         )*
     };
 }
 
+macro_rules! from_bitpacked {
+    ($($ty: ty => $unpack: path),*) => {
+        $(
+            impl FromBitpacked for $ty {
+                const BIT_CAPACITY: usize = std::mem::size_of::<$ty>() * 8;
+                // this has to match the signature of the unpack* functions
+                const BATCH_SIZE: usize = std::mem::size_of::<$ty>() * 8;
+
+                #[inline]
+                fn from_u64(v: u64) -> Self {
+                    v as _
+                }
+
+                #[inline]
+                fn unpack_batch(input: &[u8], output: &mut [Self], num_bits: 
usize) {
+                    $unpack(input, (&mut 
output[..Self::BATCH_SIZE]).try_into().unwrap(), num_bits)
+                }
+            }
+        )*
+    }
+}
+
+macro_rules! from_bitpacked_delegate {
+    ($($ty: ty => $delegate: ty),*) => {
+        $(
+            impl FromBitpacked for $ty {
+                const BIT_CAPACITY: usize = <$delegate as 
FromBitpacked>::BIT_CAPACITY;
+                const BATCH_SIZE: usize = <$delegate as 
FromBitpacked>::BATCH_SIZE;
+
+                fn from_u64(v: u64) -> Self {
+                    v as _
+                }
+
+                fn unpack_batch(input: &[u8], output: &mut [Self], num_bits: 
usize) {
+                    const {
+                        assert!(std::mem::size_of::<$ty>() == 
std::mem::size_of::<$delegate>());
+                        assert!(std::mem::align_of::<$ty>() == 
std::mem::align_of::<$delegate>());
+                    }
+                    // Safety: ty and delegate have the same size and 
alignment, and this macro is only used for types that have transmutable bit 
patterns.
+                    let output: &mut [$delegate] = unsafe { 
std::slice::from_raw_parts_mut(output.as_mut_ptr().cast::<$delegate>(), 
output.len()) };
+                    <$delegate>::unpack_batch(input, output, num_bits);
+                }
+            }
+        )*
+    }
+}
+
 from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64 }
+from_bitpacked!(u8 => unpack8, u16 => unpack16, u32 => unpack32, u64 => 
unpack64);
+from_bitpacked_delegate!(i8 => u8 , i16 => u16, i32 => u32, i64 => u64);
+
+impl FromBitpacked for bool {
+    const BIT_CAPACITY: usize = 1;
+    const BATCH_SIZE: usize = <u8 as FromBitpacked>::BATCH_SIZE;
+
+    fn from_u64(v: u64) -> Self {
+        v != 0
+    }
 
-// SAFETY: all bit patterns are valid for f32 and f64.
-unsafe impl FromBytes for f32 {
-    const BIT_CAPACITY: usize = 32;
+    fn unpack_batch(input: &[u8], output: &mut [Self], num_bits: usize) {
+        assert!(num_bits == 1);

Review Comment:
   The compiler should hoist this outside of any loop, or even realize that for 
boolean it will only get called with a bit width of 1.
   
   I just searched for benchmarks for the boolean data type and did not find 
any, so it might make sense to add a benchmark first.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to