Jefffrey commented on code in PR #9710:
URL: https://github.com/apache/arrow-rs/pull/9710#discussion_r3460404504


##########
arrow-data/src/transform/mod.rs:
##########
@@ -716,36 +721,95 @@ impl<'a> MutableArrayData<'a> {
         }
     }
 
-    /// Extends the in progress array with a region of the input arrays
+    /// Extends the in progress array with a region of the input arrays, 
returning an error on
+    /// overflow.
     ///
     /// # Arguments
-    /// * `index` - the index of array that you what to copy values from
+    /// * `index` - the index of array that you want to copy values from
     /// * `start` - the start index of the chunk (inclusive)
     /// * `end` - the end index of the chunk (exclusive)
     ///
+    /// # Errors
+    /// Returns an error if offset arithmetic overflows the underlying integer 
type.
+    ///
     /// # Panic
     /// This function panics if there is an invalid index,
     /// i.e. `index` >= the number of source arrays
     /// or `end` > the length of the `index`th array
-    pub fn extend(&mut self, index: usize, start: usize, end: usize) {
+    pub fn try_extend(&mut self, index: usize, start: usize, end: usize) -> 
Result<(), ArrowError> {
         let len = end - start;
         (self.extend_null_bits[index])(&mut self.data, start, len);
-        (self.extend_values[index])(&mut self.data, index, start, len);
+        // Snapshot buffer lengths before attempting the extend so we can roll
+        // back to a consistent state if it fails.
+        let buf1_len = self.data.buffer1.len();
+        let buf2_len = self.data.buffer2.len();
+        if let Err(e) = (self.extend_values[index])(&mut self.data, index, 
start, len) {
+            // Restore buffers to their pre-call lengths so the array remains
+            // in a valid state for the caller to inspect or retry.
+            self.data.buffer1.truncate(buf1_len);
+            self.data.buffer2.truncate(buf2_len);
+            return Err(e);
+        }
         self.data.len += len;
+        Ok(())
     }
 
-    /// Extends the in progress array with null elements, ignoring the input 
arrays.
+    /// Extends the in progress array with a region of the input arrays.
+    ///
+    /// # Deprecated

Review Comment:
   we probably dont need to restate the deprecation in the docstring; the 
attribute is sufficient



##########
arrow-data/src/transform/structure.rs:
##########
@@ -17,21 +17,45 @@
 
 use super::{_MutableArrayData, Extend};
 use crate::ArrayData;
+use arrow_schema::{ArrowError, DataType};
 
 pub(super) fn build_extend(_: &ArrayData) -> Extend<'_> {
     Box::new(
         move |mutable: &mut _MutableArrayData, index: usize, start: usize, 
len: usize| {
-            mutable
-                .child_data
-                .iter_mut()
-                .for_each(|child| child.extend(index, start, start + len))
+            // Collect field names before the mutable borrow of child_data.
+            let field_names = struct_field_names(&mutable.data_type);

Review Comment:
   this seems like a spot of concern, if we're always gathering the names for 
each invocation here (especially as this gathering has allocations)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to