jhorstmann commented on a change in pull request #8590: URL: https://github.com/apache/arrow/pull/8590#discussion_r519014044
########## File path: rust/arrow/src/array/data.rs ########## @@ -211,57 +211,66 @@ impl ArrayData { impl PartialEq for ArrayData { fn eq(&self, other: &Self) -> bool { - assert_eq!( - self.data_type(), - other.data_type(), - "Data types not the same" - ); - assert_eq!(self.len(), other.len(), "Lengths not the same"); - // TODO: when adding tests for this, test that we can compare with arrays that have offsets - assert_eq!(self.offset(), other.offset(), "Offsets not the same"); - assert_eq!(self.null_count(), other.null_count()); - // compare buffers excluding padding - let self_buffers = self.buffers(); - let other_buffers = other.buffers(); - assert_eq!(self_buffers.len(), other_buffers.len()); - self_buffers.iter().zip(other_buffers).for_each(|(s, o)| { - compare_buffer_regions( - s, - self.offset(), // TODO mul by data length - o, - other.offset(), // TODO mul by data len - ); - }); - // assert_eq!(self.buffers(), other.buffers()); - - assert_eq!(self.child_data(), other.child_data()); - // null arrays can skip the null bitmap, thus only compare if there are no nulls - if self.null_count() != 0 || other.null_count() != 0 { - compare_buffer_regions( + self.data_type() == other.data_type() + && self.len() == other.len() + // TODO: when adding tests for this, test that we can compare with arrays that have + // offsets + && self.offset() == other.offset() + && compare_buffers( + self.buffers(), + self.offset(), + other.buffers(), + other.offset(), + ) + && self.child_data() == other.child_data() + && self.null_count() == other.null_count() + // null arrays can skip the null bitmap, thus only compare if there are no nulls + // previous line would fail if null counts differed, so this check only needs to + // check one null_count to see if there are no nulls + && (self.null_count() == 0 || compare_buffer_regions( Review comment: `compare_buffers_regions` also needs an additional parameter for the array length to be correct since the array could be a slice of some bigger underlying buffers. I missed the PR that introduced `compare_buffers_regions`, the way it is written it will only work correctly for bitpacked buffers. That is correct for this call, but I found another usage where it is used for other buffers. It uses the `buffer.bit_slice` method which is documented to take an offset in bits. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org