This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch alamb/consistent_arithmetic
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 7a5ecbb6361f035b54eaa640d1784eb3c076cde4
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue May 5 10:24:07 2026 -0400

    [arrow-array] use usize arithmetic in FixedSizeBinaryArray
---
 arrow-array/src/array/fixed_size_binary_array.rs | 128 ++++++++---------------
 1 file changed, 46 insertions(+), 82 deletions(-)

diff --git a/arrow-array/src/array/fixed_size_binary_array.rs 
b/arrow-array/src/array/fixed_size_binary_array.rs
index a54496addc..118d395c66 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -92,7 +92,7 @@ pub struct FixedSizeBinaryArray {
     value_data: Buffer,
     nulls: Option<NullBuffer>,
     len: usize,
-    value_length: i32,
+    value_size: usize,
 }
 
 impl FixedSizeBinaryArray {
@@ -124,7 +124,6 @@ impl FixedSizeBinaryArray {
     /// * `value_length < 0`
     /// * `values.len() / value_length != nulls.len()`
     /// * `value_length == 0 && values.len() != 0`
-    /// * `len * value_length > i32::MAX`
     pub fn try_new(
         value_length: i32,
         values: Buffer,
@@ -163,44 +162,15 @@ impl FixedSizeBinaryArray {
             }
         };
 
-        Self::validate_lengths(value_size, len)?;
-
         Ok(Self {
             data_type,
             value_data: values,
-            value_length,
+            value_size,
             nulls,
             len,
         })
     }
 
-    /// Some calculations below use i32 arithmetic which can overflow when
-    /// valid offsets are past i32::MAX. Until that is solved for real do not
-    /// permit constructing any FixedSizeBinaryArray that has a valid offset
-    /// past i32::MAX
-    fn validate_lengths(value_size: usize, len: usize) -> Result<(), 
ArrowError> {
-        // the offset is also calculated for the next element (i + 1) so
-        // check `len` (not last element index) to ensure that all offsets are 
valid
-        let max_offset = value_size.checked_mul(len).ok_or_else(|| {
-            ArrowError::InvalidArgumentError(format!(
-                "FixedSizeBinaryArray error: value size {value_size} * len 
{len} exceeds maximum valid offset"
-            ))
-        })?;
-
-        let max_valid_offset: usize = i32::MAX.try_into().map_err(|_| {
-            ArrowError::InvalidArgumentError(format!(
-                "FixedSizeBinaryArray error: maximum valid offset exceeds 
i32::MAX, got {max_offset}"
-            ))
-        })?;
-
-        if max_offset > max_valid_offset {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "FixedSizeBinaryArray error: value size {value_size} * length 
{len} exceeds maximum valid offset of {max_valid_offset}"
-            )));
-        };
-        Ok(())
-    }
-
     /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values 
are null
     ///
     /// # Panics
@@ -209,26 +179,28 @@ impl FixedSizeBinaryArray {
     ///
     /// * `value_length < 0`
     /// * `value_length * len` would overflow `usize`
-    /// * `value_length * len > i32::MAX`
     /// * `value_length * len * 8` would overflow `usize`
     pub fn new_null(value_length: i32, len: usize) -> Self {
         const BITS_IN_A_BYTE: usize = 8;
         let value_size = value_length.to_usize().unwrap();
-        Self::validate_lengths(value_size, len).unwrap();
         let capacity_in_bytes = value_size.checked_mul(len).unwrap();
         let capacity_in_bits = 
capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap();
         Self {
             data_type: DataType::FixedSizeBinary(value_length),
             value_data: MutableBuffer::new_null(capacity_in_bits).into(),
             nulls: Some(NullBuffer::new_null(len)),
-            value_length,
+            value_size,
             len,
         }
     }
 
     /// Deconstruct this array into its constituent parts
     pub fn into_parts(self) -> (i32, Buffer, Option<NullBuffer>) {
-        (self.value_length, self.value_data, self.nulls)
+        let value_length = self
+            .value_size
+            .try_into()
+            .expect("FixedSizeBinaryArray value length exceeds i32");
+        (value_length, self.value_data, self.nulls)
     }
 
     /// Returns the element at index `i` as a byte slice.
@@ -246,12 +218,9 @@ impl FixedSizeBinaryArray {
             self.len()
         );
         let offset = i + self.offset();
+        let position = self.value_position_at(offset);
         unsafe {
-            let pos = self.value_offset_at(offset);
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(pos as isize),
-                (self.value_offset_at(offset + 1) - pos) as usize,
-            )
+            std::slice::from_raw_parts(self.value_data.as_ptr().add(position), 
self.value_size)
         }
     }
 
@@ -266,21 +235,32 @@ impl FixedSizeBinaryArray {
     /// of the array
     pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
         let offset = i + self.offset();
-        let pos = self.value_offset_at(offset);
+        let position = self.value_position_at(offset);
         unsafe {
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(pos as isize),
-                (self.value_offset_at(offset + 1) - pos) as usize,
-            )
+            std::slice::from_raw_parts(self.value_data.as_ptr().add(position), 
self.value_size)
         }
     }
 
     /// Returns the offset for the element at index `i`.
     ///
     /// Note this doesn't do any bound checking, for performance reason.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the computed byte offset exceeds `i32::MAX`.
     #[inline]
     pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.offset() + i)
+        let offset = self
+            .offset()
+            .checked_add(i)
+            .expect("FixedSizeBinaryArray value offset index overflow");
+        let position = self
+            .value_size
+            .checked_mul(offset)
+            .expect("FixedSizeBinaryArray value offset overflow");
+        position
+            .try_into()
+            .expect("FixedSizeBinaryArray value offset exceeds i32")
     }
 
     /// Returns the length for an element.
@@ -288,7 +268,9 @@ impl FixedSizeBinaryArray {
     /// All elements have the same length as the array is a fixed size.
     #[inline]
     pub fn value_length(&self) -> i32 {
-        self.value_length
+        self.value_size
+            .try_into()
+            .expect("FixedSizeBinaryArray value length exceeds i32")
     }
 
     /// Returns the values of this array.
@@ -312,13 +294,13 @@ impl FixedSizeBinaryArray {
             "the length + offset of the sliced FixedSizeBinaryArray cannot 
exceed the existing length"
         );
 
-        let size = self.value_length as usize;
-
         Self {
             data_type: self.data_type.clone(),
             nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
-            value_length: self.value_length,
-            value_data: self.value_data.slice_with_length(offset * size, len * 
size),
+            value_size: self.value_size,
+            value_data: self
+                .value_data
+                .slice_with_length(self.value_position_at(offset), 
self.value_position_at(len)),
             len,
         }
     }
@@ -420,7 +402,6 @@ impl FixedSizeBinaryArray {
         let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
 
         let value_size = value_size.unwrap_or(0);
-        Self::validate_lengths(value_size, len)?;
         let value_length = value_size.try_into().map_err(|_| {
             ArrowError::InvalidArgumentError(format!(
                 "FixedSizeBinaryArray value length exceeds i32, got 
{value_size}"
@@ -430,7 +411,7 @@ impl FixedSizeBinaryArray {
             data_type: DataType::FixedSizeBinary(value_length),
             value_data: buffer.into(),
             nulls,
-            value_length,
+            value_size,
             len,
         })
     }
@@ -514,14 +495,13 @@ impl FixedSizeBinaryArray {
         })?;
 
         let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
-        Self::validate_lengths(value_size, len)?;
 
         Ok(Self {
             data_type: DataType::FixedSizeBinary(value_length),
             value_data: buffer.into(),
             nulls,
             len,
-            value_length,
+            value_size,
         })
     }
 
@@ -583,7 +563,6 @@ impl FixedSizeBinaryArray {
         }
 
         let value_size = value_size.unwrap_or(0);
-        Self::validate_lengths(value_size, len)?;
         let value_length = value_size.try_into().map_err(|_| {
             ArrowError::InvalidArgumentError(format!(
                 "FixedSizeBinaryArray value length exceeds i32, got 
{value_size}"
@@ -593,14 +572,14 @@ impl FixedSizeBinaryArray {
             data_type: DataType::FixedSizeBinary(value_length),
             value_data: buffer.into(),
             nulls: None,
-            value_length,
+            value_size,
             len,
         })
     }
 
     #[inline]
-    fn value_offset_at(&self, i: usize) -> i32 {
-        self.value_length * i as i32
+    fn value_position_at(&self, i: usize) -> usize {
+        self.value_size * i
     }
 
     /// constructs a new iterator
@@ -626,8 +605,6 @@ impl From<ArrayData> for FixedSizeBinaryArray {
         let value_size = value_length
             .to_usize()
             .expect("FixedSizeBinaryArray value length must be non-negative");
-        Self::validate_lengths(value_size, len)
-            .expect("FixedSizeBinaryArray offsets must fit within i32");
         let value_data = buffers[0].slice_with_length(
             offset.checked_mul(value_size).expect("offset overflow"),
             len.checked_mul(value_size).expect("length overflow"),
@@ -638,7 +615,7 @@ impl From<ArrayData> for FixedSizeBinaryArray {
             nulls,
             len,
             value_data,
-            value_length,
+            value_size,
         }
     }
 }
@@ -1017,7 +994,7 @@ mod tests {
 
     #[test]
     fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
-        let input_arg = vec![None, None, None, None, None] as 
Vec<Option<Vec<u8>>>;
+        let input_arg: Vec<Option<Vec<u8>>> = vec![None, None, None, None, 
None];
 
         let arr = 
FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16)
             .unwrap();
@@ -1086,7 +1063,7 @@ mod tests {
 
     #[test]
     fn fixed_size_binary_array_all_null() {
-        let data = vec![None] as Vec<Option<String>>;
+        let data: Vec<Option<String>> = vec![None];
         let array =
             
FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 
0).unwrap();
         array
@@ -1123,23 +1100,10 @@ mod tests {
     }
 
     #[test]
-    fn test_validate_lengths_allows_empty_array() {
-        FixedSizeBinaryArray::validate_lengths(1024, 0).unwrap();
-    }
-
-    #[test]
-    fn test_validate_lengths_allows_i32_max_offset() {
-        FixedSizeBinaryArray::validate_lengths(1, i32::MAX as usize).unwrap();
-        FixedSizeBinaryArray::validate_lengths(262_176, 8191).unwrap();
-    }
-
-    #[test]
-    fn test_validate_lengths_rejects_offset_past_i32_max() {
-        let err = FixedSizeBinaryArray::validate_lengths(262_177, 
8192).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Invalid argument error: FixedSizeBinaryArray error: value size 
262177 * length 8192 exceeds maximum valid offset of 2147483647",
-        );
+    #[should_panic(expected = "FixedSizeBinaryArray value offset exceeds i32")]
+    fn test_value_offset_panics_when_offset_exceeds_i32() {
+        let array = FixedSizeBinaryArray::new(1, Buffer::from_vec(vec![0_u8; 
1]), None);
+        let _ = array.value_offset((i32::MAX as usize) + 1);
     }
 
     #[test]

Reply via email to