alamb commented on code in PR #6288:
URL: https://github.com/apache/arrow-rs/pull/6288#discussion_r1758949806


##########
arrow-buffer/src/util/bit_mask.rs:
##########
@@ -30,35 +29,128 @@ pub fn set_bits(
     offset_read: usize,
     len: usize,
 ) -> usize {
+    assert!(offset_write + len <= write_data.len() * 8);
+    assert!(offset_read + len <= data.len() * 8);
     let mut null_count = 0;
-
-    let mut bits_to_align = offset_write % 8;
-    if bits_to_align > 0 {
-        bits_to_align = std::cmp::min(len, 8 - bits_to_align);
+    let mut acc = 0;
+    while len > acc {
+        // SAFETY: the arguments to `set_upto_64bits` are within the valid 
range because
+        // (offset_write + acc) + (len - acc) == offset_write + len <= 
write_data.len() * 8
+        // (offset_read + acc) + (len - acc) == offset_read + len <= 
data.len() * 8
+        let (n, len_set) = unsafe {
+            set_upto_64bits(
+                write_data,
+                data,
+                offset_write + acc,
+                offset_read + acc,
+                len - acc,
+            )
+        };
+        null_count += n;
+        acc += len_set;
     }
-    let mut write_byte_index = ceil(offset_write + bits_to_align, 8);
-
-    // Set full bytes provided by bit chunk iterator (which iterates in 64 
bits at a time)
-    let chunks = BitChunks::new(data, offset_read + bits_to_align, len - 
bits_to_align);
-    chunks.iter().for_each(|chunk| {
-        null_count += chunk.count_zeros();
-        write_data[write_byte_index..write_byte_index + 
8].copy_from_slice(&chunk.to_le_bytes());
-        write_byte_index += 8;
-    });
-
-    // Set individual bits both to align write_data to a byte offset and the 
remainder bits not covered by the bit chunk iterator
-    let remainder_offset = len - chunks.remainder_len();
-    (0..bits_to_align)
-        .chain(remainder_offset..len)
-        .for_each(|i| {
-            if get_bit(data, offset_read + i) {
-                set_bit(write_data, offset_write + i);
+
+    null_count
+}
+
+/// # Safety

Review Comment:
   Could you please also update this documentation with a description of what 
both the arguments and return value represent  (if it is the same as `set_bits` 
a reference to that function would be fine)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to