vertexclique commented on a change in pull request #8571:
URL: https://github.com/apache/arrow/pull/8571#discussion_r516513055



##########
File path: rust/arrow/src/util/bit_chunk_iterator.rs
##########
@@ -214,10 +214,37 @@ mod tests {
 
         let result = bitchunks.into_iter().collect::<Vec<_>>();
 
-        //assert_eq!(vec![0b00010000, 0b00100000, 0b01000000, 0b10000000, 
0b00000000, 0b00000001, 0b00000010, 0b11110100], result);
         assert_eq!(
             
vec![0b1111010000000010000000010000000010000000010000000010000000010000],
             result
         );
     }
+
+    #[test]
+    fn test_iter_unaligned_remainder_bits_across_bytes() {
+        let input: &[u8] = &[0b00000000, 0b11111111];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bitchunks = buffer.bit_chunks(6, 7);

Review comment:
       What is 6, what is 7? Is 6 from left ? is 6 from right? Why there are 7 
bits in the assertion? Why there are 5 bits there?

##########
File path: rust/arrow/src/util/bit_chunk_iterator.rs
##########
@@ -72,22 +72,23 @@ impl<'a> BitChunks<'a> {
         if bit_len == 0 {
             0
         } else {
-            let byte_len = ceil(bit_len, 8);
-
-            let mut bits = 0;
-            for i in 0..byte_len {
-                let byte = unsafe {
-                    std::ptr::read(
-                        self.raw_data
-                            .add(self.chunk_len * std::mem::size_of::<u64>() + 
i),
-                    )
-                };
-                bits |= (byte as u64) << (i * 8);
-            }
+            let bit_offset = self.offset;
+            // number of bytes to read
+            // might be one more than sizeof(u64) if the offset is in the 
middle of a byte
+            let byte_len = ceil(bit_len + bit_offset, 8);
+            // pointer to remainder bytes after all complete chunks
+            let base = unsafe {
+                self.raw_data
+                    .add(self.chunk_len * std::mem::size_of::<u64>())

Review comment:
       Where chunk_len is created, chunk_bits is obviously: `8 * 
std::mem:size_of::<u64>()`
   Any reason to not write that?

##########
File path: rust/arrow/src/util/bit_chunk_iterator.rs
##########
@@ -214,10 +214,37 @@ mod tests {
 
         let result = bitchunks.into_iter().collect::<Vec<_>>();
 
-        //assert_eq!(vec![0b00010000, 0b00100000, 0b01000000, 0b10000000, 
0b00000000, 0b00000001, 0b00000010, 0b11110100], result);
         assert_eq!(
             
vec![0b1111010000000010000000010000000010000000010000000010000000010000],
             result
         );
     }
+
+    #[test]
+    fn test_iter_unaligned_remainder_bits_across_bytes() {
+        let input: &[u8] = &[0b00000000, 0b11111111];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bitchunks = buffer.bit_chunks(6, 7);
+
+        assert_eq!(7, bitchunks.remainder_len());

Review comment:
       None of these methods have documentation about what they do. e.g. 
remainder len in which form? bits? bytes?

##########
File path: rust/arrow/src/util/bit_chunk_iterator.rs
##########
@@ -214,10 +214,37 @@ mod tests {
 
         let result = bitchunks.into_iter().collect::<Vec<_>>();
 
-        //assert_eq!(vec![0b00010000, 0b00100000, 0b01000000, 0b10000000, 
0b00000000, 0b00000001, 0b00000010, 0b11110100], result);
         assert_eq!(
             
vec![0b1111010000000010000000010000000010000000010000000010000000010000],
             result
         );
     }
+
+    #[test]
+    fn test_iter_unaligned_remainder_bits_across_bytes() {
+        let input: &[u8] = &[0b00000000, 0b11111111];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bitchunks = buffer.bit_chunks(6, 7);
+
+        assert_eq!(7, bitchunks.remainder_len());
+        assert_eq!(0b1111100, bitchunks.remainder_bits());
+    }
+
+    #[test]
+    fn test_iter_unaligned_remainder_bits_large() {
+        let input: &[u8] = &[
+            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 
0b00000000,
+            0b11111111, 0b00000000, 0b11111111,
+        ];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bitchunks = buffer.bit_chunks(2, 63);
+
+        assert_eq!(63, bitchunks.remainder_len());
+        assert_eq!(
+            
0b01000000_00111111_11000000_00111111_11000000_00111111_11000000_00111111,

Review comment:
       Why this forms these bit pattern? How a reader can be ensure that it is 
forming the correct pattern while reading?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to