This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 8091f3f17b [arrow-array] Use consistent `value_length` name in
FixedSizeBinaryArray (#9905)
8091f3f17b is described below
commit 8091f3f17b2de355f7c47e7a0907000d308f8f3e
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue May 5 15:08:06 2026 -0400
[arrow-array] Use consistent `value_length` name in FixedSizeBinaryArray
(#9905)
# Which issue does this PR close?
- Part of https://github.com/apache/arrow-rs/issues/9906
- First follow on to https://github.com/apache/arrow-rs/pull/9872
# Rationale for this change
While trying to avoid overflows due to using i32 arithmetic in
FixedSizeBinaryArray, I found the use of the term `size` in parameters
to be confusing when the field name is called `value_length`
# What changes are included in this PR?
Change several parameter / variable names to `value_length` to keep the
code consistent
# Are these changes tested?
By CI
# Are there any user-facing changes?
No this is an internal code refactor
---
arrow-array/src/array/fixed_size_binary_array.rs | 150 ++++++++++++-----------
1 file changed, 79 insertions(+), 71 deletions(-)
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs
b/arrow-array/src/array/fixed_size_binary_array.rs
index 3c3dc8f589..a54496addc 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -96,44 +96,48 @@ pub struct FixedSizeBinaryArray {
}
impl FixedSizeBinaryArray {
- /// Create a new [`FixedSizeBinaryArray`] with `size` element size,
panicking on failure
+ /// Create a new [`FixedSizeBinaryArray`] with `value_length` bytes per
element, panicking on
+ /// failure
///
/// # Panics
///
/// Panics if [`Self::try_new`] returns an error
- pub fn new(size: i32, values: Buffer, nulls: Option<NullBuffer>) -> Self {
- Self::try_new(size, values, nulls).unwrap()
+ pub fn new(value_length: i32, values: Buffer, nulls: Option<NullBuffer>)
-> Self {
+ Self::try_new(value_length, values, nulls).unwrap()
}
/// Create a new [`Scalar`] from `value`
pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> {
let v = value.as_ref();
- let size = i32::try_from(v.len()).expect("FixedSizeBinaryArray value
length exceeds i32");
- Scalar::new(Self::new(size, Buffer::from(v), None))
+ let value_length =
+ i32::try_from(v.len()).expect("FixedSizeBinaryArray value length
exceeds i32");
+ Scalar::new(Self::new(value_length, Buffer::from(v), None))
}
/// Create a new [`FixedSizeBinaryArray`] from the provided parts,
returning an error on failure
///
- /// Creating an arrow with `size == 0` will try to get the length from the
null buffer. If
- /// no null buffer is provided, the resulting array will have length zero.
+ /// Creating an array with `value_length == 0` will try to get the length
from the null
+ /// buffer. If no null buffer is provided, the resulting array will have
length zero.
///
/// # Errors
///
- /// * `size < 0`
- /// * `values.len() / size != nulls.len()`
- /// * `size == 0 && values.len() != 0`
- /// * `len * size > i32::MAX`
+ /// * `value_length < 0`
+ /// * `values.len() / value_length != nulls.len()`
+ /// * `value_length == 0 && values.len() != 0`
+ /// * `len * value_length > i32::MAX`
pub fn try_new(
- size: i32,
+ value_length: i32,
values: Buffer,
nulls: Option<NullBuffer>,
) -> Result<Self, ArrowError> {
- let data_type = DataType::FixedSizeBinary(size);
- let s = size.to_usize().ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!("Size cannot be negative,
got {size}"))
+ let data_type = DataType::FixedSizeBinary(value_length);
+ let value_size = value_length.to_usize().ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Value length cannot be negative, got {value_length}"
+ ))
})?;
- let len = match values.len().checked_div(s) {
+ let len = match values.len().checked_div(value_size) {
Some(len) => {
if let Some(n) = nulls.as_ref() {
if n.len() != len {
@@ -150,21 +154,21 @@ impl FixedSizeBinaryArray {
None => {
if !values.is_empty() {
return Err(ArrowError::InvalidArgumentError(
- "Buffer cannot have non-zero length if the item size
is zero".to_owned(),
+ "Buffer cannot have non-zero length if the value
length is zero".to_owned(),
));
}
- // If the item size is zero, try to determine the length from
the null buffer
+ // If the value length is zero, try to determine the length
from the null buffer
nulls.as_ref().map(|n| n.len()).unwrap_or(0)
}
};
- Self::validate_lengths(s, len)?;
+ Self::validate_lengths(value_size, len)?;
Ok(Self {
data_type,
value_data: values,
- value_length: size,
+ value_length,
nulls,
len,
})
@@ -203,21 +207,21 @@ impl FixedSizeBinaryArray {
///
/// Panics if
///
- /// * `size < 0`
- /// * `size * len` would overflow `usize`
- /// * `size * len > i32::MAX`
- /// * `size * len * 8` would overflow `usize`
- pub fn new_null(size: i32, len: usize) -> Self {
+ /// * `value_length < 0`
+ /// * `value_length * len` would overflow `usize`
+ /// * `value_length * len > i32::MAX`
+ /// * `value_length * len * 8` would overflow `usize`
+ pub fn new_null(value_length: i32, len: usize) -> Self {
const BITS_IN_A_BYTE: usize = 8;
- let size_usize = size.to_usize().unwrap();
- Self::validate_lengths(size_usize, len).unwrap();
- let capacity_in_bytes = size_usize.checked_mul(len).unwrap();
+ let value_size = value_length.to_usize().unwrap();
+ Self::validate_lengths(value_size, len).unwrap();
+ let capacity_in_bytes = value_size.checked_mul(len).unwrap();
let capacity_in_bits =
capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap();
Self {
- data_type: DataType::FixedSizeBinary(size),
+ data_type: DataType::FixedSizeBinary(value_length),
value_data: MutableBuffer::new_null(capacity_in_bits).into(),
nulls: Some(NullBuffer::new_null(len)),
- value_length: size,
+ value_length,
len,
}
}
@@ -351,7 +355,7 @@ impl FixedSizeBinaryArray {
U: AsRef<[u8]>,
{
let mut len = 0;
- let mut size = None;
+ let mut value_size = None;
let mut byte = 0;
let iter_size_hint = iter.size_hint().0;
@@ -369,7 +373,7 @@ impl FixedSizeBinaryArray {
if let Some(slice) = item {
let slice = slice.as_ref();
- if let Some(size) = size {
+ if let Some(size) = value_size {
if size != slice.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Nested array size mismatch: one is {}, and the
other is {}",
@@ -379,7 +383,7 @@ impl FixedSizeBinaryArray {
}
} else {
let len = slice.len();
- size = Some(len);
+ value_size = Some(len);
// Now that we know how large each element is we can
reserve
// sufficient capacity in the underlying mutable buffer for
// the data.
@@ -396,7 +400,7 @@ impl FixedSizeBinaryArray {
}
bit_util::set_bit(null_buf.as_slice_mut(), len);
buffer.extend_from_slice(slice);
- } else if let Some(size) = size {
+ } else if let Some(size) = value_size {
buffer.extend_zeros(size);
} else {
prepend += 1;
@@ -415,18 +419,18 @@ impl FixedSizeBinaryArray {
let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
- let size = size.unwrap_or(0);
- Self::validate_lengths(size, len)?;
- let size = size.try_into().map_err(|_| {
+ let value_size = value_size.unwrap_or(0);
+ Self::validate_lengths(value_size, len)?;
+ let value_length = value_size.try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
- "FixedSizeBinaryArray value length exceeds i32, got {size}"
+ "FixedSizeBinaryArray value length exceeds i32, got
{value_size}"
))
})?;
Ok(Self {
- data_type: DataType::FixedSizeBinary(size),
+ data_type: DataType::FixedSizeBinary(value_length),
value_data: buffer.into(),
nulls,
- value_length: size,
+ value_length,
len,
})
}
@@ -434,8 +438,8 @@ impl FixedSizeBinaryArray {
/// Create an array from an iterable argument of sparse byte slices.
/// Sparsity means that items returned by the iterator are optional, i.e
input argument can
/// contain `None` items. In cases where the iterator returns only `None`
values, this
- /// also takes a size parameter to ensure that the a valid
FixedSizeBinaryArray is still
- /// created.
+ /// also takes a `value_length` parameter to ensure that a valid
+ /// [`FixedSizeBinaryArray`] is still created.
///
/// # Examples
///
@@ -455,22 +459,27 @@ impl FixedSizeBinaryArray {
/// # Errors
///
/// Returns error if argument has length zero, or sizes of nested slices
don't match.
- pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) ->
Result<Self, ArrowError>
+ pub fn try_from_sparse_iter_with_size<T, U>(
+ mut iter: T,
+ value_length: i32,
+ ) -> Result<Self, ArrowError>
where
T: Iterator<Item = Option<U>>,
U: AsRef<[u8]>,
{
- let size_usize = size.to_usize().ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!("Size cannot be negative,
got {size}"))
+ let value_size = value_length.to_usize().ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Value length cannot be negative, got {value_length}"
+ ))
})?;
let mut len = 0;
let mut byte = 0;
let iter_size_hint = iter.size_hint().0;
let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint,
8));
- let capacity = iter_size_hint.checked_mul(size_usize).ok_or_else(|| {
+ let capacity = iter_size_hint.checked_mul(value_size).ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
- "FixedSizeBinaryArray error: value size {size_usize} * len
hint {iter_size_hint} exceeds usize"
+ "FixedSizeBinaryArray error: value size {value_size} * len
hint {iter_size_hint} exceeds usize"
))
})?;
let mut buffer = MutableBuffer::new(capacity);
@@ -485,10 +494,10 @@ impl FixedSizeBinaryArray {
if let Some(slice) = item {
let slice = slice.as_ref();
- if size_usize != slice.len() {
+ if value_size != slice.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Nested array size mismatch: one is {}, and the other
is {}",
- size,
+ value_length,
slice.len()
)));
}
@@ -496,7 +505,7 @@ impl FixedSizeBinaryArray {
bit_util::set_bit(null_buf.as_slice_mut(), len);
buffer.extend_from_slice(slice);
} else {
- buffer.extend_zeros(size_usize);
+ buffer.extend_zeros(value_size);
}
len += 1;
@@ -505,14 +514,14 @@ impl FixedSizeBinaryArray {
})?;
let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
- Self::validate_lengths(size_usize, len)?;
+ Self::validate_lengths(value_size, len)?;
Ok(Self {
- data_type: DataType::FixedSizeBinary(size),
+ data_type: DataType::FixedSizeBinary(value_length),
value_data: buffer.into(),
nulls,
len,
- value_length: size,
+ value_length,
})
}
@@ -539,23 +548,22 @@ impl FixedSizeBinaryArray {
U: AsRef<[u8]>,
{
let mut len = 0;
- let mut size = None;
+ let mut value_size = None;
let iter_size_hint = iter.size_hint().0;
let mut buffer = MutableBuffer::new(0);
iter.try_for_each(|item| -> Result<(), ArrowError> {
let slice = item.as_ref();
- if let Some(size) = size {
- if size != slice.len() {
+ if let Some(value_size) = value_size {
+ if value_size != slice.len() {
return Err(ArrowError::InvalidArgumentError(format!(
- "Nested array size mismatch: one is {}, and the other
is {}",
- size,
+ "Nested array size mismatch: one is {value_size}, and
the other is {}",
slice.len()
)));
}
} else {
let len = slice.len();
- size = Some(len);
+ value_size = Some(len);
if let Some(capacity) = iter_size_hint.checked_mul(len) {
buffer.reserve(capacity);
}
@@ -574,18 +582,18 @@ impl FixedSizeBinaryArray {
));
}
- let size = size.unwrap_or(0);
- Self::validate_lengths(size, len)?;
- let size = size.try_into().map_err(|_| {
+ let value_size = value_size.unwrap_or(0);
+ Self::validate_lengths(value_size, len)?;
+ let value_length = value_size.try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
- "FixedSizeBinaryArray value length exceeds i32, got {size}"
+ "FixedSizeBinaryArray value length exceeds i32, got
{value_size}"
))
})?;
Ok(Self {
- data_type: DataType::FixedSizeBinary(size),
+ data_type: DataType::FixedSizeBinary(value_length),
value_data: buffer.into(),
nulls: None,
- value_length: size,
+ value_length,
len,
})
}
@@ -615,14 +623,14 @@ impl From<ArrayData> for FixedSizeBinaryArray {
_ => panic!("Expected data type to be FixedSizeBinary"),
};
- let size = value_length
+ let value_size = value_length
.to_usize()
.expect("FixedSizeBinaryArray value length must be non-negative");
- Self::validate_lengths(size, len)
+ Self::validate_lengths(value_size, len)
.expect("FixedSizeBinaryArray offsets must fit within i32");
let value_data = buffers[0].slice_with_length(
- offset.checked_mul(size).expect("offset overflow"),
- len.checked_mul(size).expect("length overflow"),
+ offset.checked_mul(value_size).expect("offset overflow"),
+ len.checked_mul(value_size).expect("length overflow"),
);
Self {
@@ -1157,7 +1165,7 @@ mod tests {
assert_eq!(
err.to_string(),
- "Invalid argument error: Size cannot be negative, got -1"
+ "Invalid argument error: Value length cannot be negative, got -1"
);
let nulls = NullBuffer::new_null(3);
@@ -1182,7 +1190,7 @@ mod tests {
FixedSizeBinaryArray::try_new(0, buffer, None).unwrap_err();
assert_eq!(
zero_sized_with_non_empty_buffer_err.to_string(),
- "Invalid argument error: Buffer cannot have non-zero length if the
item size is zero"
+ "Invalid argument error: Buffer cannot have non-zero length if the
value length is zero"
);
}
}