alamb commented on a change in pull request #8541: URL: https://github.com/apache/arrow/pull/8541#discussion_r519169437
########## File path: rust/arrow/src/array/builder.rs ########## @@ -3535,14 +3531,15 @@ mod tests { None, Some(6), Some(7), + // array.data() end Some(3), None, None, Some(6), - ])) as ArrayRef; + ]); assert_eq!(finished.len(), expected.len()); assert_eq!(finished.null_count(), expected.null_count()); - assert!(finished.equals(&(*expected))); + assert_eq!(finished, expected); Review comment: this is certainly a lot nicer ########## File path: rust/arrow/src/array/builder.rs ########## @@ -531,7 +531,7 @@ impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> { for i in 0..len { // account for offset as `ArrayData` does not Review comment: this comment may need to be updated as the code seems to no longer account for offset ########## File path: rust/arrow/src/array/equal/mod.rs ########## @@ -0,0 +1,829 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Module containing functionality to compute array equality. +//! This module uses [ArrayData] and does not +//! depend on dynamic casting of `Array`. + +use super::{ + array::BinaryOffsetSizeTrait, Array, ArrayData, FixedSizeBinaryArray, + GenericBinaryArray, GenericListArray, GenericStringArray, OffsetSizeTrait, + PrimitiveArray, StringOffsetSizeTrait, StructArray, +}; + +use crate::datatypes::{ArrowPrimitiveType, DataType, IntervalUnit}; + +mod boolean; +mod dictionary; +mod fixed_binary; +mod fixed_list; +mod list; +mod null; +mod primitive; +mod structure; +mod utils; +mod variable_size; + +// these methods assume the same type, len and null count. +// For this reason, they are not exposed and are instead used +// to build the generic functions below (`equal_range` and `equal`). +use boolean::boolean_equal; +use dictionary::dictionary_equal; +use fixed_binary::fixed_binary_equal; +use fixed_list::fixed_list_equal; +use list::list_equal; +use null::null_equal; +use primitive::primitive_equal; +use structure::struct_equal; +use variable_size::variable_sized_equal; + +impl PartialEq for dyn Array { + fn eq(&self, other: &Self) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +impl<T: Array> PartialEq<T> for dyn Array { + fn eq(&self, other: &T) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> { + fn eq(&self, other: &PrimitiveArray<T>) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +impl<OffsetSize: StringOffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> { + fn eq(&self, other: &Self) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> { + fn eq(&self, other: &Self) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +impl PartialEq for FixedSizeBinaryArray { + fn eq(&self, other: &Self) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> { + fn eq(&self, other: &Self) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +impl PartialEq for StructArray { + fn eq(&self, other: &Self) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + +#[inline] +fn equal_values( Review comment: ```suggestion /// Compares two arrays for equality starting at `lhs_start` and `rhs_start` /// `lhs` and `rhs` *must* have the same data type fn equal_values( ``` ########## File path: rust/arrow/src/array/data.rs ########## @@ -207,6 +206,41 @@ impl ArrayData { size } + + /// Creates a zero-copy slice of itself. This creates a new [ArrayData] + /// with a different offset, len and a shifted null bitmap. + /// + /// # Panics + /// + /// Panics if `offset + length > self.len()`. + pub fn slice(&self, offset: usize, length: usize) -> ArrayData { + assert!((offset + length) <= self.len()); + + let mut new_data = self.clone(); + + new_data.len = length; + new_data.offset = offset + self.offset; + + new_data.null_count = + count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); + + new_data + } + + /// Returns the buffer `buffer` as a slice of type `T`. The slice is already offset. Review comment: ```suggestion /// Returns self.buffers at index `buffer` as a slice of type `T` starting at self.offset ``` ########## File path: rust/arrow/src/array/data.rs ########## @@ -207,6 +206,41 @@ impl ArrayData { size } + + /// Creates a zero-copy slice of itself. This creates a new [ArrayData] + /// with a different offset, len and a shifted null bitmap. + /// + /// # Panics + /// + /// Panics if `offset + length > self.len()`. + pub fn slice(&self, offset: usize, length: usize) -> ArrayData { + assert!((offset + length) <= self.len()); + + let mut new_data = self.clone(); + + new_data.len = length; + new_data.offset = offset + self.offset; + + new_data.null_count = + count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); + + new_data + } + + /// Returns the buffer `buffer` as a slice of type `T`. The slice is already offset. + /// # Panics + /// This function panics if: + /// * the buffer is not byte-aligned with type T, or + /// * the datatype is `Boolean` (it corresponds to a bit-packed buffer where the offset is not applicable) + #[inline] + pub(super) fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] { + let values = unsafe { self.buffers[buffer].data().align_to::<T>() }; + if values.0.len() != 0 || values.2.len() != 0 { + panic!("The buffer is not byte-aligned with its interpretation") Review comment: 👍 for the panic ########## File path: rust/arrow/src/array/builder.rs ########## @@ -3999,7 +3996,7 @@ mod tests { let expected_list = FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayDataRef); let expected_list = FixedSizeBinaryArray::from(expected_list); - // assert!(expected_list.values().equals(&*finished.values())); + // assert_eq!(expected_list.values(), finished.values()); Review comment: not that you did it, but I wonder why this code is commented out -- maybe your changes will have fixed it ########## File path: rust/arrow/src/array/array.rs ########## @@ -112,10 +112,10 @@ pub trait Array: fmt::Debug + Send + Sync + ArrayEqual + JsonEqual { /// // Make slice over the values [2, 3, 4] /// let array_slice = array.slice(1, 3); /// - /// assert!(array_slice.equals(&Int32Array::from(vec![2, 3, 4]))); + /// assert_eq!(array_slice.as_ref(), &Int32Array::from(vec![2, 3, 4])); Review comment: :heart ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org