This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 479ad5b844 Prevent Rows row index overflow (#9817)
479ad5b844 is described below

commit 479ad5b8445977624543e5004711c5c583c1a4fe
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Apr 25 11:20:29 2026 -0400

    Prevent Rows row index overflow (#9817)
    
    # Which issue does this PR close?
    
    - None.
    
    # Rationale for this change
    
    Rows used unchecked usize arithmetic when validating a requested row
    index. In optimized builds, very large indexes could wrap the bounds
    check before reaching the unchecked row access path.
    
    # What changes are included in this PR?
    
    This adds checked arithmetic for row index validation and reuses it for
    both Rows::row and Rows::row_len.
    
    # Are these changes tested?
    
    Yes. This adds regression coverage for overflowing row indexes.
    
    # Are there any user-facing changes?
    
    Invalid row indexes that overflow during bounds validation now panic
    consistently. There are no API changes.
---
 arrow-row/src/lib.rs | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 078c457477..dcf81562a0 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1255,14 +1255,20 @@ impl Rows {
 
     /// Returns the row at index `row`
     pub fn row(&self, row: usize) -> Row<'_> {
-        assert!(row + 1 < self.offsets.len());
+        self.checked_row_end(row);
         unsafe { self.row_unchecked(row) }
     }
 
+    fn checked_row_end(&self, row: usize) -> usize {
+        row.checked_add(1)
+            .filter(|end| *end < self.offsets.len())
+            .expect("row index out of bounds")
+    }
+
     /// Returns the row at `index` without bounds checking
     ///
     /// # Safety
-    /// Caller must ensure that `index` is less than the number of offsets 
(#rows + 1)
+    /// Caller must ensure that `index + 1` is less than the number of offsets 
(#rows + 1)
     pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> {
         let end = unsafe { self.offsets.get_unchecked(index + 1) };
         let start = unsafe { self.offsets.get_unchecked(index) };
@@ -1276,9 +1282,9 @@ impl Rows {
     /// Returns the number of bytes the row at index `row` is occupying,
     /// that is, what is the length of the returned [`Row::data`] will be.
     pub fn row_len(&self, row: usize) -> usize {
-        assert!(row + 1 < self.offsets.len());
+        let end = self.checked_row_end(row);
 
-        self.offsets[row + 1] - self.offsets[row]
+        self.offsets[end] - self.offsets[row]
     }
 
     /// Get an iterator over the lengths of each row in this [`Rows`]
@@ -5404,6 +5410,24 @@ mod tests {
         assert_eq!(lengths_iter.next(), None);
     }
 
+    #[test]
+    #[should_panic(expected = "row index out of bounds")]
+    fn row_should_panic_on_overflowing_index() {
+        let rows = RowConverter::new(vec![SortField::new(DataType::Int32)])
+            .unwrap()
+            .empty_rows(0, 0);
+        rows.row(usize::MAX);
+    }
+
+    #[test]
+    #[should_panic(expected = "row index out of bounds")]
+    fn row_len_should_panic_on_overflowing_index() {
+        let rows = RowConverter::new(vec![SortField::new(DataType::Int32)])
+            .unwrap()
+            .empty_rows(0, 0);
+        rows.row_len(usize::MAX);
+    }
+
     #[test]
     fn test_nested_null_list() {
         let null_array = Arc::new(NullArray::new(3));

Reply via email to