alamb commented on code in PR #9872:
URL: https://github.com/apache/arrow-rs/pull/9872#discussion_r3175017344
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -410,13 +459,14 @@ impl FixedSizeBinaryArray {
})?;
let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
+ Self::validate_lengths(value_size, len)?;
Review Comment:
new call to validate_lengths
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -368,17 +406,28 @@ impl FixedSizeBinaryArray {
/// # Errors
///
/// Returns error if argument has length zero, or sizes of nested slices
don't match.
- pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) ->
Result<Self, ArrowError>
+ pub fn try_from_sparse_iter_with_size<T, U>(
+ mut iter: T,
+ value_length: i32,
Review Comment:
renamed parameter for consistency
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -368,17 +406,28 @@ impl FixedSizeBinaryArray {
/// # Errors
///
/// Returns error if argument has length zero, or sizes of nested slices
don't match.
- pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) ->
Result<Self, ArrowError>
+ pub fn try_from_sparse_iter_with_size<T, U>(
+ mut iter: T,
+ value_length: i32,
+ ) -> Result<Self, ArrowError>
where
T: Iterator<Item = Option<U>>,
U: AsRef<[u8]>,
{
+ let value_size = value_length.to_usize().ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!("Size cannot be negative,
got {value_length}"))
+ })?;
let mut len = 0;
let mut byte = 0;
let iter_size_hint = iter.size_hint().0;
let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint,
8));
- let mut buffer = MutableBuffer::new(iter_size_hint * (size as usize));
+ let capacity = iter_size_hint.checked_mul(value_size).ok_or_else(|| {
Review Comment:
used checked multiplication rather than straight up *
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -120,31 +120,63 @@ impl FixedSizeBinaryArray {
}
};
+ Self::validate_lengths(value_size, len)?;
Review Comment:
This is an actual change: to ensure offset calculations will not overflow
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -120,31 +120,63 @@ impl FixedSizeBinaryArray {
}
};
+ Self::validate_lengths(value_size, len)?;
+
Ok(Self {
data_type,
value_data: values,
- value_length: size,
+ value_length,
nulls,
len,
})
}
+ /// Some calculations below use i32 arithmetic which can overflow when
+ /// valid offsets are past i32::MAX. Until that is solved for real do not
+ /// permit constructing any FixedSizeBinaryArray that has a valid offset
+ /// past i32::MAX
+ fn validate_lengths(value_size: usize, len: usize) -> Result<(),
ArrowError> {
+ if len == 0 {
+ return Ok(());
+ }
+ let max_offset = value_size.checked_mul(len - 1).ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "FixedSizeBinaryArray error: value size {value_size} * len
{len} exceeds maximum valid offset"
+ ))
+ })?;
+
+ let max_valid_offset: usize = i32::MAX.try_into().map_err(|_| {
+ ArrowError::InvalidArgumentError(format!(
+ "FixedSizeBinaryArray error: maximum valid offset exceeds
i32::MAX, got {max_offset}"
+ ))
+ })?;
+
+ if max_offset > max_valid_offset {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "FixedSizeBinaryArray error: value size {value_size} * length
{len} exceeds maximum valid offset of {max_valid_offset}"
+ )));
+ };
+ Ok(())
+ }
+
/// Create a new [`FixedSizeBinaryArray`] of length `len` where all values
are null
///
/// # Panics
///
/// Panics if
///
- /// * `size < 0`
- /// * `size * len` would overflow `usize`
- pub fn new_null(size: i32, len: usize) -> Self {
+ /// * `value_length < 0`
+ /// * `value_length * len` would overflow `usize`
+ pub fn new_null(value_length: i32, len: usize) -> Self {
const BITS_IN_A_BYTE: usize = 8;
- let capacity_in_bytes =
size.to_usize().unwrap().checked_mul(len).unwrap();
+ let value_size = value_length.to_usize().unwrap();
+ Self::validate_lengths(value_size, len).unwrap();
Review Comment:
new call to validate_lengths here
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -511,8 +567,15 @@ impl From<ArrayData> for FixedSizeBinaryArray {
_ => panic!("Expected data type to be FixedSizeBinary"),
};
- let size = value_length as usize;
Review Comment:
here is another unchecked conversion from value_lengh to usize
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -64,8 +64,8 @@ impl FixedSizeBinaryArray {
/// # Panics
///
/// Panics if [`Self::try_new`] returns an error
- pub fn new(size: i32, values: Buffer, nulls: Option<NullBuffer>) -> Self {
- Self::try_new(size, values, nulls).unwrap()
+ pub fn new(value_length: i32, values: Buffer, nulls: Option<NullBuffer>)
-> Self {
Review Comment:
I also renamed the parameters from `size` to `value_length` to be consistent
with the field name, and make it clear that this is the size of each element in
the array (not the number of elements in the array)
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -368,17 +406,28 @@ impl FixedSizeBinaryArray {
/// # Errors
///
/// Returns error if argument has length zero, or sizes of nested slices
don't match.
- pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) ->
Result<Self, ArrowError>
+ pub fn try_from_sparse_iter_with_size<T, U>(
+ mut iter: T,
+ value_length: i32,
+ ) -> Result<Self, ArrowError>
where
T: Iterator<Item = Option<U>>,
U: AsRef<[u8]>,
{
+ let value_size = value_length.to_usize().ok_or_else(|| {
Review Comment:
this is a new check that `value_length` can be converted to usize without
truncation / wrap (aka that value_length is not negative)
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -476,12 +526,18 @@ impl FixedSizeBinaryArray {
));
}
- let size = size.unwrap_or(0).try_into().unwrap();
+ let value_size = size.unwrap_or(0);
+ Self::validate_lengths(value_size, len)?;
Review Comment:
validate_lengths here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]