nevi-me commented on a change in pull request #9093:
URL: https://github.com/apache/arrow/pull/9093#discussion_r551920615



##########
File path: rust/arrow/src/array/equal/utils.rs
##########
@@ -76,3 +80,185 @@ pub(super) fn equal_len(
 ) -> bool {
     lhs_values[lhs_start..(lhs_start + len)] == 
rhs_values[rhs_start..(rhs_start + len)]
 }
+
+/// Computes the logical validity bitmap of the array data using the
+/// parent's array data. The parent should be a list or struct, else
+/// the logical bitmap of the array is returned unaltered.
+///
+/// Parent data is passed along with the parent's logical bitmap, as
+/// nested arrays could have a logical bitmap different to the physical
+/// one on the `ArrayData`.
+pub(super) fn child_logical_null_buffer(
+    parent_data: &ArrayData,
+    logical_null_buffer: Option<Buffer>,
+    child_data: &ArrayData,
+) -> Option<Buffer> {
+    let parent_len = parent_data.len();
+    let parent_bitmap = 
logical_null_buffer.map(Bitmap::from).unwrap_or_else(|| {
+        let ceil = bit_util::ceil(parent_len, 8);
+        Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
+    });
+    let self_null_bitmap = child_data.null_bitmap().clone().unwrap_or_else(|| {
+        let ceil = bit_util::ceil(child_data.len(), 8);
+        Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
+    });
+    match parent_data.data_type() {
+        DataType::List(_) => Some(logical_list_bitmap::<i32>(
+            parent_data,
+            parent_bitmap,
+            self_null_bitmap,
+        )),
+        DataType::LargeList(_) => Some(logical_list_bitmap::<i64>(
+            parent_data,
+            parent_bitmap,
+            self_null_bitmap,
+        )),
+        DataType::FixedSizeList(_, len) => {
+            let len = *len as usize;
+            let array_offset = parent_data.offset();
+            let bitmap_len = bit_util::ceil(parent_len * len, 8);
+            let mut buffer =
+                MutableBuffer::new(bitmap_len).with_bitset(bitmap_len, false);
+            let mut null_slice = buffer.as_slice_mut();
+            (array_offset..parent_len + array_offset).for_each(|index| {
+                let start = index * len;
+                let end = start + len;
+                let mask = parent_bitmap.is_set(index);
+                (start..end).for_each(|child_index| {
+                    if mask && self_null_bitmap.is_set(child_index) {
+                        bit_util::set_bit(&mut null_slice, child_index);
+                    }
+                });
+            });
+            Some(buffer.into())
+        }
+        DataType::Struct(_) => {
+            // Arrow implementations are free to pad data, which can result in 
null buffers not
+            // having the same length.
+            // Rust bitwise comparisons will return an error if left AND right 
is performed on
+            // buffers of different length.
+            // This might be a valid case during integration testing, where we 
read Arrow arrays
+            // from IPC data, which has padding.
+            //
+            // We first perform a bitwise comparison, and if there is an 
error, we revert to a
+            // slower method that indexes into the buffers one-by-one.
+            let result = &parent_bitmap & &self_null_bitmap;
+            if let Ok(bitmap) = result {
+                return Some(bitmap.bits);
+            }
+            // slow path
+            let array_offset = parent_data.offset();
+            let mut buffer = MutableBuffer::new_null(parent_len);
+            let mut null_slice = buffer.as_slice_mut();
+            (0..parent_len).for_each(|index| {
+                if parent_bitmap.is_set(index + array_offset)
+                    && self_null_bitmap.is_set(index + array_offset)
+                {
+                    bit_util::set_bit(&mut null_slice, index);
+                }
+            });
+            Some(buffer.into())
+        }
+        DataType::Union(_) => {
+            unimplemented!("Logical equality not yet implemented for union 
arrays")
+        }
+        DataType::Dictionary(_, _) => {
+            unimplemented!("Logical equality not yet implemented for nested 
dictionaries")
+        }
+        data_type => {
+            panic!("Data type {:?} is not a supported nested type", data_type)
+        }
+    }
+}
+
+// Calculate a list child's logical bitmap/buffer

Review comment:
       I'm comfortable that I've captured the correct semantics of logical 
equality for lists; that said, lists have been a thorn on my side for some time 
now :(




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to