alamb commented on code in PR #6394:
URL: https://github.com/apache/arrow-rs/pull/6394#discussion_r1759590955
##########
arrow-buffer/src/util/bit_mask.rs:
##########
@@ -64,125 +64,235 @@ pub fn set_bits(
#[cfg(test)]
mod tests {
use super::*;
+ use crate::bit_util::unset_bit;
+ use rand::prelude::StdRng;
+ use rand::{Fill, Rng, SeedableRng};
+ use std::fmt::Display;
#[test]
fn test_set_bits_aligned() {
- let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
- let source: &[u8] = &[
- 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
- 0b10100101,
- ];
-
- let destination_offset = 8;
- let source_offset = 0;
-
- let len = 64;
-
- let expected_data: &[u8] = &[
- 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
- 0b10100101, 0,
- ];
- let expected_null_count = 24;
- let result = set_bits(
- destination.as_mut_slice(),
- source,
- destination_offset,
- source_offset,
- len,
- );
-
- assert_eq!(destination, expected_data);
- assert_eq!(result, expected_null_count);
+ SetBitsTest {
+ write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ data: vec![
+ 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
+ 0b10100101,
+ ],
+ offset_write: 8,
+ offset_read: 0,
+ len: 64,
+ expected_data: vec![
+ 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011,
+ 0b11100111, 0b10100101, 0,
+ ],
+ expected_null_count: 24,
+ }
+ .verify();
}
#[test]
fn test_set_bits_unaligned_destination_start() {
- let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
- let source: &[u8] = &[
- 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
- 0b10100101,
- ];
-
- let destination_offset = 3;
- let source_offset = 0;
-
- let len = 64;
-
- let expected_data: &[u8] = &[
- 0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110,
0b00011111, 0b00111110,
- 0b00101111, 0b00000101, 0b00000000,
- ];
- let expected_null_count = 24;
- let result = set_bits(
- destination.as_mut_slice(),
- source,
- destination_offset,
- source_offset,
- len,
- );
-
- assert_eq!(destination, expected_data);
- assert_eq!(result, expected_null_count);
+ SetBitsTest {
+ write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ data: vec![
+ 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
+ 0b10100101,
+ ],
+ offset_write: 3,
+ offset_read: 0,
+ len: 64,
+ expected_data: vec![
+ 0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110,
0b00011111, 0b00111110,
+ 0b00101111, 0b00000101, 0b00000000,
+ ],
+ expected_null_count: 24,
+ }
+ .verify();
}
#[test]
fn test_set_bits_unaligned_destination_end() {
- let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
- let source: &[u8] = &[
- 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
- 0b10100101,
- ];
-
- let destination_offset = 8;
- let source_offset = 0;
-
- let len = 62;
-
- let expected_data: &[u8] = &[
- 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
- 0b00100101, 0,
- ];
- let expected_null_count = 23;
- let result = set_bits(
- destination.as_mut_slice(),
- source,
- destination_offset,
- source_offset,
- len,
- );
-
- assert_eq!(destination, expected_data);
- assert_eq!(result, expected_null_count);
+ SetBitsTest {
+ write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ data: vec![
+ 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
+ 0b10100101,
+ ],
+ offset_write: 8,
+ offset_read: 0,
+ len: 62,
+ expected_data: vec![
+ 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011,
+ 0b11100111, 0b00100101, 0,
+ ],
+ expected_null_count: 23,
+ }
+ .verify();
}
#[test]
fn test_set_bits_unaligned() {
- let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0];
- let source: &[u8] = &[
- 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
- 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101,
- 0b10011001, 0b11011011, 0b11101011, 0b11000011,
- ];
-
- let destination_offset = 3;
- let source_offset = 5;
-
- let len = 95;
-
- let expected_data: &[u8] = &[
- 0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010,
0b11110000, 0b01111001,
- 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
0b00000001,
- ];
- let expected_null_count = 35;
- let result = set_bits(
- destination.as_mut_slice(),
- source,
- destination_offset,
- source_offset,
- len,
- );
-
- assert_eq!(destination, expected_data);
- assert_eq!(result, expected_null_count);
+ SetBitsTest {
+ write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ data: vec![
+ 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011,
0b11000011, 0b11100111,
+ 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101,
+ 0b10011001, 0b11011011, 0b11101011, 0b11000011,
+ ],
+ offset_write: 3,
+ offset_read: 5,
+ len: 95,
+ expected_data: vec![
+ 0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010,
0b11110000, 0b01111001,
+ 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
0b00000001,
+ ],
+ expected_null_count: 35,
+ }
+ .verify();
+ }
+
+ #[test]
+ fn set_bits_fuz() {
+ let mut rng = StdRng::seed_from_u64(42);
+ let mut data = SetBitsTest::new();
+ for _ in 0..10000 {
+ data.regen(&mut rng);
+ data.verify();
+ }
+ }
+
+ #[derive(Debug, Default)]
+ struct SetBitsTest {
+ /// target write data
+ write_data: Vec<u8>,
+ /// source data
+ data: Vec<u8>,
+ offset_write: usize,
+ offset_read: usize,
+ len: usize,
+ /// the expected contents of write_data after the test
+ expected_data: Vec<u8>,
+ /// the expected number of nulls copied at the end of the test
+ expected_null_count: usize,
+ }
+
+ /// prints a byte slice as a binary string like "01010101 10101010"
+ struct BinaryFormatter<'a>(&'a [u8]);
+ impl<'a> Display for BinaryFormatter<'a> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ for byte in self.0 {
+ write!(f, "{:08b} ", byte)?;
+ }
+ write!(f, " ")?;
+ Ok(())
+ }
+ }
+
+ impl Display for SetBitsTest {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ writeln!(f, "SetBitsTest {{")?;
+ writeln!(f, " write_data: {}",
BinaryFormatter(&self.write_data))?;
+ writeln!(f, " data: {}", BinaryFormatter(&self.data))?;
+ writeln!(
+ f,
+ " expected_data: {}",
+ BinaryFormatter(&self.expected_data)
+ )?;
+ writeln!(f, " offset_write: {}", self.offset_write)?;
+ writeln!(f, " offset_read: {}", self.offset_read)?;
+ writeln!(f, " len: {}", self.len)?;
+ writeln!(f, " expected_null_count: {}",
self.expected_null_count)?;
+ writeln!(f, "}")
+ }
+ }
+
+ impl SetBitsTest {
+ /// create a new instance of FuzzData
+ fn new() -> Self {
+ Self::default()
+ }
+
+ /// Update this instance's fields with randomly selected values and
expected data
+ fn regen(&mut self, rng: &mut StdRng) {
+ // (read) data
+ // ------------------+-----------------+-------
+ // .. offset_read .. | data | ...
+ // ------------------+-----------------+-------
+
+ // Write data
+ // -------------------+-----------------+-------
+ // .. offset_write .. | (data to write) | ...
+ // -------------------+-----------------+-------
+
+ // length of data to copy
+ let len = rng.gen_range(0..=200);
+
+ // randomly pick where we will write to
+ let offset_write_bits = rng.gen_range(0..=200);
+ let offset_write_bytes = if offset_write_bits % 8 == 0 {
+ offset_write_bits / 8
+ } else {
+ (offset_write_bits / 8) + 1
+ };
+ let extra_write_data_bytes = rng.gen_range(0..=5); // ensure 0
shows up often
+
+ // randomly decide where we will read from
+ let extra_read_data_bytes = rng.gen_range(0..=5); // make sure 0
shows up often
+ let offset_read_bits = rng.gen_range(0..=200);
+ let offset_read_bytes = if offset_read_bits % 8 != 0 {
+ (offset_read_bits / 8) + 1
+ } else {
+ offset_read_bits / 8
+ };
+
+ // create space for writing
+ self.write_data.clear();
+ self.write_data
+ .resize(offset_write_bytes + len + extra_write_data_bytes, 0);
+
+ // interestingly set_bits seems to assume the output is already
zeroed
+ // the fuzz tests fail when this is uncommented
+ //self.write_data.try_fill(rng).unwrap();
+ self.offset_write = offset_write_bits;
+
+ // make source data
+ self.data
+ .resize(offset_read_bytes + len + extra_read_data_bytes, 0);
+ // fill source data with random bytes
+ self.data.try_fill(rng).unwrap();
+ self.offset_read = offset_read_bits;
+
+ self.len = len;
+
+ // generated expectated output (not efficient)
+ self.expected_data.resize(self.write_data.len(), 0);
+ self.expected_data.copy_from_slice(&self.write_data);
+
+ self.expected_null_count = 0;
Review Comment:
The fuzz tester runs this code to compute the expected output (using
`set_bit` and `get_bit`)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]