This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 479ad5b844 Prevent Rows row index overflow (#9817)
479ad5b844 is described below
commit 479ad5b8445977624543e5004711c5c583c1a4fe
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Apr 25 11:20:29 2026 -0400
Prevent Rows row index overflow (#9817)
# Which issue does this PR close?
- None.
# Rationale for this change
Rows used unchecked usize arithmetic when validating a requested row
index. In optimized builds, very large indexes could wrap the bounds
check before reaching the unchecked row access path.
# What changes are included in this PR?
This adds checked arithmetic for row index validation and reuses it for
both Rows::row and Rows::row_len.
# Are these changes tested?
Yes. This adds regression coverage for overflowing row indexes.
# Are there any user-facing changes?
Invalid row indexes that overflow during bounds validation now panic
consistently. There are no API changes.
---
arrow-row/src/lib.rs | 32 ++++++++++++++++++++++++++++----
1 file changed, 28 insertions(+), 4 deletions(-)
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 078c457477..dcf81562a0 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1255,14 +1255,20 @@ impl Rows {
/// Returns the row at index `row`
pub fn row(&self, row: usize) -> Row<'_> {
- assert!(row + 1 < self.offsets.len());
+ self.checked_row_end(row);
unsafe { self.row_unchecked(row) }
}
+ fn checked_row_end(&self, row: usize) -> usize {
+ row.checked_add(1)
+ .filter(|end| *end < self.offsets.len())
+ .expect("row index out of bounds")
+ }
+
/// Returns the row at `index` without bounds checking
///
/// # Safety
- /// Caller must ensure that `index` is less than the number of offsets
(#rows + 1)
+ /// Caller must ensure that `index + 1` is less than the number of offsets
(#rows + 1)
pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> {
let end = unsafe { self.offsets.get_unchecked(index + 1) };
let start = unsafe { self.offsets.get_unchecked(index) };
@@ -1276,9 +1282,9 @@ impl Rows {
/// Returns the number of bytes the row at index `row` is occupying,
/// that is, what is the length of the returned [`Row::data`] will be.
pub fn row_len(&self, row: usize) -> usize {
- assert!(row + 1 < self.offsets.len());
+ let end = self.checked_row_end(row);
- self.offsets[row + 1] - self.offsets[row]
+ self.offsets[end] - self.offsets[row]
}
/// Get an iterator over the lengths of each row in this [`Rows`]
@@ -5404,6 +5410,24 @@ mod tests {
assert_eq!(lengths_iter.next(), None);
}
+ #[test]
+ #[should_panic(expected = "row index out of bounds")]
+ fn row_should_panic_on_overflowing_index() {
+ let rows = RowConverter::new(vec![SortField::new(DataType::Int32)])
+ .unwrap()
+ .empty_rows(0, 0);
+ rows.row(usize::MAX);
+ }
+
+ #[test]
+ #[should_panic(expected = "row index out of bounds")]
+ fn row_len_should_panic_on_overflowing_index() {
+ let rows = RowConverter::new(vec![SortField::new(DataType::Int32)])
+ .unwrap()
+ .empty_rows(0, 0);
+ rows.row_len(usize::MAX);
+ }
+
#[test]
fn test_nested_null_list() {
let null_array = Arc::new(NullArray::new(3));