This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch 56_maintenance
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/56_maintenance by this push:
     new aeb648c1b2 [56_maintenance] Prevent buffer builder length overflow in 
MutableBuffer::extend_zeros (#9820) (#9915)
aeb648c1b2 is described below

commit aeb648c1b24a7bb8add937b01b083344438e3a77
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 6 10:54:06 2026 -0400

    [56_maintenance] Prevent buffer builder length overflow in 
MutableBuffer::extend_zeros (#9820) (#9915)
    
    - Part of https://github.com/apache/arrow-rs/issues/9857
    - Fixes https://github.com/apache/arrow-rs/issues/9897 in 56.x releases
    
    This PR:
    - Backports https://github.com/apache/arrow-rs/pull/9820 from @alamb to
    the `56_maintenance` line
---
 arrow-buffer/src/buffer/mutable.rs | 95 +++++++++++++++++++++++++++++++++++---
 arrow-buffer/src/builder/mod.rs    | 24 ++++++++++
 2 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/arrow-buffer/src/buffer/mutable.rs 
b/arrow-buffer/src/buffer/mutable.rs
index 63fdbf598b..31bb520714 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -72,6 +72,10 @@ impl MutableBuffer {
     /// Allocate a new [MutableBuffer] with initial capacity to be at least 
`capacity`.
     ///
     /// See [`MutableBuffer::with_capacity`].
+    ///
+    /// # Panics
+    ///
+    /// See [`MutableBuffer::with_capacity`].
     #[inline]
     pub fn new(capacity: usize) -> Self {
         Self::with_capacity(capacity)
@@ -107,6 +111,7 @@ impl MutableBuffer {
 
     /// Allocates a new [MutableBuffer] with `len` and capacity to be at least 
`len` where
     /// all bytes are guaranteed to be `0u8`.
+    ///
     /// # Example
     /// ```
     /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
@@ -116,6 +121,10 @@ impl MutableBuffer {
     /// let data = buffer.as_slice_mut();
     /// assert_eq!(data[126], 0u8);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if `len` is too large to construct a valid allocation [`Layout`]
     pub fn from_len_zeroed(len: usize) -> Self {
         let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
         let data = match layout.size() {
@@ -159,6 +168,10 @@ impl MutableBuffer {
 
     /// creates a new [MutableBuffer] with capacity and length capable of 
holding `len` bits.
     /// This is useful to create a buffer for packed bitmaps.
+    ///
+    /// # Panics
+    ///
+    /// See [`MutableBuffer::from_len_zeroed`].
     pub fn new_null(len: usize) -> Self {
         let num_bytes = bit_util::ceil(len, 8);
         MutableBuffer::from_len_zeroed(num_bytes)
@@ -170,6 +183,10 @@ impl MutableBuffer {
     /// This is useful when one wants to clear (or set) the bits and then 
manipulate
     /// the buffer directly (e.g., modifying the buffer by holding a mutable 
reference
     /// from `data_mut()`).
+    ///
+    /// # Panics
+    ///
+    /// Panics if `end` exceeds the buffer capacity.
     pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
         assert!(end <= self.layout.size());
         let v = if val { 255 } else { 0 };
@@ -185,6 +202,10 @@ impl MutableBuffer {
     /// This is used to initialize the bits in a buffer, however, it has no 
impact on the
     /// `len` of the buffer and so can be used to initialize the memory region 
from
     /// `len` to `capacity`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the byte range `start..start + count` exceeds the buffer 
capacity.
     pub fn set_null_bits(&mut self, start: usize, count: usize) {
         assert!(
             start.saturating_add(count) <= self.layout.size(),
@@ -210,11 +231,19 @@ impl MutableBuffer {
     /// let buffer: Buffer = buffer.into();
     /// assert_eq!(buffer.len(), 253);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if `self.len + additional` overflows `usize`, or if the 
required capacity is too
+    /// large to round up to the next 64-byte boundary and construct a valid 
allocation layout.
     // For performance reasons, this must be inlined so that the `if` is 
executed inside the caller, and not as an extra call that just
     // exits.
     #[inline(always)]
     pub fn reserve(&mut self, additional: usize) {
-        let required_cap = self.len + additional;
+        let required_cap = self
+            .len
+            .checked_add(additional)
+            .expect("buffer length overflow");
         if required_cap > self.layout.size() {
             let new_capacity = 
bit_util::round_upto_multiple_of_64(required_cap);
             let new_capacity = std::cmp::max(new_capacity, self.layout.size() 
* 2);
@@ -276,6 +305,11 @@ impl MutableBuffer {
     /// buffer.resize(253, 2); // allocates for the first time
     /// assert_eq!(buffer.as_slice()[252], 2u8);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if growing the buffer requires reserving a capacity that fails 
for the same
+    /// reasons as [`MutableBuffer::reserve`].
     // For performance reasons, this must be inlined so that the `if` is 
executed inside the caller, and not as an extra call that just
     // exits.
     #[inline(always)]
@@ -311,6 +345,11 @@ impl MutableBuffer {
     /// buffer.shrink_to_fit();
     /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if the current length is too large to round up to the next 
64-byte boundary and
+    /// construct a valid allocation layout.
     pub fn shrink_to_fit(&mut self) {
         let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
         if new_capacity < self.layout.size() {
@@ -384,8 +423,8 @@ impl MutableBuffer {
     ///
     /// # Panics
     ///
-    /// This function panics if the underlying buffer is not aligned
-    /// correctly for type `T`.
+    /// This function panics if the underlying buffer is not aligned correctly 
for type `T`, or
+    /// if its length is not a multiple of `size_of::<T>()`.
     pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
         // SAFETY
         // ArrowNativeType is trivially transmutable, is sealed to prevent 
potentially incorrect
@@ -399,8 +438,8 @@ impl MutableBuffer {
     ///
     /// # Panics
     ///
-    /// This function panics if the underlying buffer is not aligned
-    /// correctly for type `T`.
+    /// This function panics if the underlying buffer is not aligned correctly 
for type `T`, or
+    /// if its length is not a multiple of `size_of::<T>()`.
     pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
         // SAFETY
         // ArrowNativeType is trivially transmutable, is sealed to prevent 
potentially incorrect
@@ -418,6 +457,11 @@ impl MutableBuffer {
     /// buffer.extend_from_slice(&[2u32, 0]);
     /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if extending the buffer requires reserving a capacity that 
fails for the same
+    /// reasons as [`MutableBuffer::reserve`].
     #[inline]
     pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
         let additional = mem::size_of_val(items);
@@ -441,6 +485,11 @@ impl MutableBuffer {
     /// buffer.push(256u32);
     /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if extending the buffer requires reserving a capacity that 
fails for the same
+    /// reasons as [`MutableBuffer::reserve`].
     #[inline]
     pub fn push<T: ToByteSlice>(&mut self, item: T) {
         let additional = std::mem::size_of::<T>();
@@ -466,13 +515,26 @@ impl MutableBuffer {
     }
 
     /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing 
its capacity if needed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `self.len + additional` overflows `usize`, or if growing the 
buffer requires
+    /// reserving a capacity that fails for the same reasons as 
[`MutableBuffer::reserve`].
     #[inline]
     pub fn extend_zeros(&mut self, additional: usize) {
-        self.resize(self.len + additional, 0);
+        let new_len = self
+            .len
+            .checked_add(additional)
+            .expect("buffer length overflow");
+        self.resize(new_len, 0);
     }
 
     /// # Safety
     /// The caller must ensure that the buffer was properly initialized up to 
`len`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `len` exceeds the buffer capacity.
     #[inline]
     pub unsafe fn set_len(&mut self, len: usize) {
         assert!(len <= self.capacity());
@@ -618,6 +680,13 @@ impl MutableBuffer {
     /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
     /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if the iterator does not report an upper bound via `size_hint`, 
or if the
+    /// reported length does not match the number of items produced, or if 
allocating the
+    /// required buffer fails for the same reasons as [`MutableBuffer::new`].
+    ///
     /// # Safety
     /// This method assumes that the iterator's size is correct and is 
undefined behavior
     /// to use it on an iterator that reports an incorrect length.
@@ -662,6 +731,12 @@ impl MutableBuffer {
     /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) 
};
     /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if the iterator does not report an upper bound via `size_hint`, 
or if it yields
+    /// fewer items than reported.
+    ///
     /// # Safety
     /// This method assumes that the iterator's size is correct and is 
undefined behavior
     /// to use it on an iterator that reports an incorrect length.
@@ -680,6 +755,14 @@ impl MutableBuffer {
     /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted 
(upper) length or errors
     /// if any of the items of the iterator is an error.
     /// Prefer this to `collect` whenever possible, as it is faster ~60% 
faster.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the iterator does not report an upper bound via `size_hint`, 
or if the
+    /// reported length does not match the number of items produced before an 
error-free finish,
+    /// or if allocating the required buffer fails for the same reasons as
+    /// [`MutableBuffer::new`].
+    ///
     /// # Safety
     /// This method assumes that the iterator's size is correct and is 
undefined behavior
     /// to use it on an iterator that reports an incorrect length.
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index abe510bdab..d58e4b7a09 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -418,4 +418,28 @@ mod tests {
         builder.extend([3, 4]);
         assert_eq!(builder.len(), 4);
     }
+
+    #[test]
+    #[should_panic(expected = "buffer length overflow")]
+    fn reserve_length_overflow() {
+        let mut builder = BufferBuilder::<u8>::new(1);
+        builder.append(0);
+        builder.reserve(usize::MAX);
+    }
+
+    #[test]
+    #[should_panic(expected = "buffer length overflow")]
+    fn append_n_zeroed_length_overflow() {
+        let mut builder = BufferBuilder::<u64>::new(1);
+        builder.append_n_zeroed(1);
+        builder.append_n_zeroed(usize::MAX / mem::size_of::<u64>());
+    }
+
+    #[test]
+    #[should_panic(expected = "buffer length overflow")]
+    fn advance_length_overflow() {
+        let mut builder = BufferBuilder::<u64>::new(1);
+        builder.advance(1);
+        builder.advance(usize::MAX / mem::size_of::<u64>());
+    }
 }

Reply via email to