This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch alamb/consistent_arithmetic in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
commit 7a5ecbb6361f035b54eaa640d1784eb3c076cde4 Author: Andrew Lamb <[email protected]> AuthorDate: Tue May 5 10:24:07 2026 -0400 [arrow-array] use usize arithmetic in FixedSizeBinaryArray --- arrow-array/src/array/fixed_size_binary_array.rs | 128 ++++++++--------------- 1 file changed, 46 insertions(+), 82 deletions(-) diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index a54496addc..118d395c66 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -92,7 +92,7 @@ pub struct FixedSizeBinaryArray { value_data: Buffer, nulls: Option<NullBuffer>, len: usize, - value_length: i32, + value_size: usize, } impl FixedSizeBinaryArray { @@ -124,7 +124,6 @@ impl FixedSizeBinaryArray { /// * `value_length < 0` /// * `values.len() / value_length != nulls.len()` /// * `value_length == 0 && values.len() != 0` - /// * `len * value_length > i32::MAX` pub fn try_new( value_length: i32, values: Buffer, @@ -163,44 +162,15 @@ impl FixedSizeBinaryArray { } }; - Self::validate_lengths(value_size, len)?; - Ok(Self { data_type, value_data: values, - value_length, + value_size, nulls, len, }) } - /// Some calculations below use i32 arithmetic which can overflow when - /// valid offsets are past i32::MAX. Until that is solved for real do not - /// permit constructing any FixedSizeBinaryArray that has a valid offset - /// past i32::MAX - fn validate_lengths(value_size: usize, len: usize) -> Result<(), ArrowError> { - // the offset is also calculated for the next element (i + 1) so - // check `len` (not last element index) to ensure that all offsets are valid - let max_offset = value_size.checked_mul(len).ok_or_else(|| { - ArrowError::InvalidArgumentError(format!( - "FixedSizeBinaryArray error: value size {value_size} * len {len} exceeds maximum valid offset" - )) - })?; - - let max_valid_offset: usize = i32::MAX.try_into().map_err(|_| { - ArrowError::InvalidArgumentError(format!( - "FixedSizeBinaryArray error: maximum valid offset exceeds i32::MAX, got {max_offset}" - )) - })?; - - if max_offset > max_valid_offset { - return Err(ArrowError::InvalidArgumentError(format!( - "FixedSizeBinaryArray error: value size {value_size} * length {len} exceeds maximum valid offset of {max_valid_offset}" - ))); - }; - Ok(()) - } - /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values are null /// /// # Panics @@ -209,26 +179,28 @@ impl FixedSizeBinaryArray { /// /// * `value_length < 0` /// * `value_length * len` would overflow `usize` - /// * `value_length * len > i32::MAX` /// * `value_length * len * 8` would overflow `usize` pub fn new_null(value_length: i32, len: usize) -> Self { const BITS_IN_A_BYTE: usize = 8; let value_size = value_length.to_usize().unwrap(); - Self::validate_lengths(value_size, len).unwrap(); let capacity_in_bytes = value_size.checked_mul(len).unwrap(); let capacity_in_bits = capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap(); Self { data_type: DataType::FixedSizeBinary(value_length), value_data: MutableBuffer::new_null(capacity_in_bits).into(), nulls: Some(NullBuffer::new_null(len)), - value_length, + value_size, len, } } /// Deconstruct this array into its constituent parts pub fn into_parts(self) -> (i32, Buffer, Option<NullBuffer>) { - (self.value_length, self.value_data, self.nulls) + let value_length = self + .value_size + .try_into() + .expect("FixedSizeBinaryArray value length exceeds i32"); + (value_length, self.value_data, self.nulls) } /// Returns the element at index `i` as a byte slice. @@ -246,12 +218,9 @@ impl FixedSizeBinaryArray { self.len() ); let offset = i + self.offset(); + let position = self.value_position_at(offset); unsafe { - let pos = self.value_offset_at(offset); - std::slice::from_raw_parts( - self.value_data.as_ptr().offset(pos as isize), - (self.value_offset_at(offset + 1) - pos) as usize, - ) + std::slice::from_raw_parts(self.value_data.as_ptr().add(position), self.value_size) } } @@ -266,21 +235,32 @@ impl FixedSizeBinaryArray { /// of the array pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] { let offset = i + self.offset(); - let pos = self.value_offset_at(offset); + let position = self.value_position_at(offset); unsafe { - std::slice::from_raw_parts( - self.value_data.as_ptr().offset(pos as isize), - (self.value_offset_at(offset + 1) - pos) as usize, - ) + std::slice::from_raw_parts(self.value_data.as_ptr().add(position), self.value_size) } } /// Returns the offset for the element at index `i`. /// /// Note this doesn't do any bound checking, for performance reason. + /// + /// # Panics + /// + /// Panics if the computed byte offset exceeds `i32::MAX`. #[inline] pub fn value_offset(&self, i: usize) -> i32 { - self.value_offset_at(self.offset() + i) + let offset = self + .offset() + .checked_add(i) + .expect("FixedSizeBinaryArray value offset index overflow"); + let position = self + .value_size + .checked_mul(offset) + .expect("FixedSizeBinaryArray value offset overflow"); + position + .try_into() + .expect("FixedSizeBinaryArray value offset exceeds i32") } /// Returns the length for an element. @@ -288,7 +268,9 @@ impl FixedSizeBinaryArray { /// All elements have the same length as the array is a fixed size. #[inline] pub fn value_length(&self) -> i32 { - self.value_length + self.value_size + .try_into() + .expect("FixedSizeBinaryArray value length exceeds i32") } /// Returns the values of this array. @@ -312,13 +294,13 @@ impl FixedSizeBinaryArray { "the length + offset of the sliced FixedSizeBinaryArray cannot exceed the existing length" ); - let size = self.value_length as usize; - Self { data_type: self.data_type.clone(), nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)), - value_length: self.value_length, - value_data: self.value_data.slice_with_length(offset * size, len * size), + value_size: self.value_size, + value_data: self + .value_data + .slice_with_length(self.value_position_at(offset), self.value_position_at(len)), len, } } @@ -420,7 +402,6 @@ impl FixedSizeBinaryArray { let nulls = NullBuffer::from_unsliced_buffer(null_buf, len); let value_size = value_size.unwrap_or(0); - Self::validate_lengths(value_size, len)?; let value_length = value_size.try_into().map_err(|_| { ArrowError::InvalidArgumentError(format!( "FixedSizeBinaryArray value length exceeds i32, got {value_size}" @@ -430,7 +411,7 @@ impl FixedSizeBinaryArray { data_type: DataType::FixedSizeBinary(value_length), value_data: buffer.into(), nulls, - value_length, + value_size, len, }) } @@ -514,14 +495,13 @@ impl FixedSizeBinaryArray { })?; let nulls = NullBuffer::from_unsliced_buffer(null_buf, len); - Self::validate_lengths(value_size, len)?; Ok(Self { data_type: DataType::FixedSizeBinary(value_length), value_data: buffer.into(), nulls, len, - value_length, + value_size, }) } @@ -583,7 +563,6 @@ impl FixedSizeBinaryArray { } let value_size = value_size.unwrap_or(0); - Self::validate_lengths(value_size, len)?; let value_length = value_size.try_into().map_err(|_| { ArrowError::InvalidArgumentError(format!( "FixedSizeBinaryArray value length exceeds i32, got {value_size}" @@ -593,14 +572,14 @@ impl FixedSizeBinaryArray { data_type: DataType::FixedSizeBinary(value_length), value_data: buffer.into(), nulls: None, - value_length, + value_size, len, }) } #[inline] - fn value_offset_at(&self, i: usize) -> i32 { - self.value_length * i as i32 + fn value_position_at(&self, i: usize) -> usize { + self.value_size * i } /// constructs a new iterator @@ -626,8 +605,6 @@ impl From<ArrayData> for FixedSizeBinaryArray { let value_size = value_length .to_usize() .expect("FixedSizeBinaryArray value length must be non-negative"); - Self::validate_lengths(value_size, len) - .expect("FixedSizeBinaryArray offsets must fit within i32"); let value_data = buffers[0].slice_with_length( offset.checked_mul(value_size).expect("offset overflow"), len.checked_mul(value_size).expect("length overflow"), @@ -638,7 +615,7 @@ impl From<ArrayData> for FixedSizeBinaryArray { nulls, len, value_data, - value_length, + value_size, } } } @@ -1017,7 +994,7 @@ mod tests { #[test] fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() { - let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>; + let input_arg: Vec<Option<Vec<u8>>> = vec![None, None, None, None, None]; let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16) .unwrap(); @@ -1086,7 +1063,7 @@ mod tests { #[test] fn fixed_size_binary_array_all_null() { - let data = vec![None] as Vec<Option<String>>; + let data: Vec<Option<String>> = vec![None]; let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap(); array @@ -1123,23 +1100,10 @@ mod tests { } #[test] - fn test_validate_lengths_allows_empty_array() { - FixedSizeBinaryArray::validate_lengths(1024, 0).unwrap(); - } - - #[test] - fn test_validate_lengths_allows_i32_max_offset() { - FixedSizeBinaryArray::validate_lengths(1, i32::MAX as usize).unwrap(); - FixedSizeBinaryArray::validate_lengths(262_176, 8191).unwrap(); - } - - #[test] - fn test_validate_lengths_rejects_offset_past_i32_max() { - let err = FixedSizeBinaryArray::validate_lengths(262_177, 8192).unwrap_err(); - assert_eq!( - err.to_string(), - "Invalid argument error: FixedSizeBinaryArray error: value size 262177 * length 8192 exceeds maximum valid offset of 2147483647", - ); + #[should_panic(expected = "FixedSizeBinaryArray value offset exceeds i32")] + fn test_value_offset_panics_when_offset_exceeds_i32() { + let array = FixedSizeBinaryArray::new(1, Buffer::from_vec(vec![0_u8; 1]), None); + let _ = array.value_offset((i32::MAX as usize) + 1); } #[test]
