nevi-me commented on a change in pull request #9093:
URL: https://github.com/apache/arrow/pull/9093#discussion_r551920615
##########
File path: rust/arrow/src/array/equal/utils.rs
##########
@@ -76,3 +80,185 @@ pub(super) fn equal_len(
) -> bool {
lhs_values[lhs_start..(lhs_start + len)] ==
rhs_values[rhs_start..(rhs_start + len)]
}
+
+/// Computes the logical validity bitmap of the array data using the
+/// parent's array data. The parent should be a list or struct, else
+/// the logical bitmap of the array is returned unaltered.
+///
+/// Parent data is passed along with the parent's logical bitmap, as
+/// nested arrays could have a logical bitmap different to the physical
+/// one on the `ArrayData`.
+pub(super) fn child_logical_null_buffer(
+ parent_data: &ArrayData,
+ logical_null_buffer: Option<Buffer>,
+ child_data: &ArrayData,
+) -> Option<Buffer> {
+ let parent_len = parent_data.len();
+ let parent_bitmap =
logical_null_buffer.map(Bitmap::from).unwrap_or_else(|| {
+ let ceil = bit_util::ceil(parent_len, 8);
+ Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
+ });
+ let self_null_bitmap = child_data.null_bitmap().clone().unwrap_or_else(|| {
+ let ceil = bit_util::ceil(child_data.len(), 8);
+ Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
+ });
+ match parent_data.data_type() {
+ DataType::List(_) => Some(logical_list_bitmap::<i32>(
+ parent_data,
+ parent_bitmap,
+ self_null_bitmap,
+ )),
+ DataType::LargeList(_) => Some(logical_list_bitmap::<i64>(
+ parent_data,
+ parent_bitmap,
+ self_null_bitmap,
+ )),
+ DataType::FixedSizeList(_, len) => {
+ let len = *len as usize;
+ let array_offset = parent_data.offset();
+ let bitmap_len = bit_util::ceil(parent_len * len, 8);
+ let mut buffer =
+ MutableBuffer::new(bitmap_len).with_bitset(bitmap_len, false);
+ let mut null_slice = buffer.as_slice_mut();
+ (array_offset..parent_len + array_offset).for_each(|index| {
+ let start = index * len;
+ let end = start + len;
+ let mask = parent_bitmap.is_set(index);
+ (start..end).for_each(|child_index| {
+ if mask && self_null_bitmap.is_set(child_index) {
+ bit_util::set_bit(&mut null_slice, child_index);
+ }
+ });
+ });
+ Some(buffer.into())
+ }
+ DataType::Struct(_) => {
+ // Arrow implementations are free to pad data, which can result in
null buffers not
+ // having the same length.
+ // Rust bitwise comparisons will return an error if left AND right
is performed on
+ // buffers of different length.
+ // This might be a valid case during integration testing, where we
read Arrow arrays
+ // from IPC data, which has padding.
+ //
+ // We first perform a bitwise comparison, and if there is an
error, we revert to a
+ // slower method that indexes into the buffers one-by-one.
+ let result = &parent_bitmap & &self_null_bitmap;
+ if let Ok(bitmap) = result {
+ return Some(bitmap.bits);
+ }
+ // slow path
+ let array_offset = parent_data.offset();
+ let mut buffer = MutableBuffer::new_null(parent_len);
+ let mut null_slice = buffer.as_slice_mut();
+ (0..parent_len).for_each(|index| {
+ if parent_bitmap.is_set(index + array_offset)
+ && self_null_bitmap.is_set(index + array_offset)
+ {
+ bit_util::set_bit(&mut null_slice, index);
+ }
+ });
+ Some(buffer.into())
+ }
+ DataType::Union(_) => {
+ unimplemented!("Logical equality not yet implemented for union
arrays")
+ }
+ DataType::Dictionary(_, _) => {
+ unimplemented!("Logical equality not yet implemented for nested
dictionaries")
+ }
+ data_type => {
+ panic!("Data type {:?} is not a supported nested type", data_type)
+ }
+ }
+}
+
+// Calculate a list child's logical bitmap/buffer
Review comment:
I'm comfortable that I've captured the correct semantics of logical
equality for lists; that said, lists have been a thorn on my side for some time
now :(
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]