vegarsti commented on code in PR #9562:
URL: https://github.com/apache/arrow-rs/pull/9562#discussion_r2939262105


##########
arrow-data/src/transform/list_view.rs:
##########
@@ -27,21 +27,55 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + 
CheckedAdd>(
     let offsets = array.buffer::<T>(0);
     let sizes = array.buffer::<T>(1);
     Box::new(
-        move |mutable: &mut _MutableArrayData, _index: usize, start: usize, 
len: usize| {
+        move |mutable: &mut _MutableArrayData, index: usize, start: usize, 
len: usize| {
             let offset_buffer = &mut mutable.buffer1;
             let sizes_buffer = &mut mutable.buffer2;
 
-            for &offset in &offsets[start..start + len] {
-                offset_buffer.push(offset);
-            }
+            // MutableArrayData builds a new independent array, so we must 
copy the child values
+            // that the source elements reference. Since ListView allows a
+            // non-contiguous offset layout we can do this efficiently by 
finding the
+            // bounding child range [child_min, child_max) that covers all 
elements being copied.
+            // We then bulk copy that entire range into the output child 
array, and
+            // remap each element's offset to point to its data at the new
+            // location. Sizes are unchanged.
+            //
+            // The copied range may include unreferenced child values in gaps
+            // between elements, but that is fine — ListView allows 
non-contiguous
+            // offsets, so the (offset, size) pairs precisely identify each
+            // element's data regardless of what else sits in the child array.
 
-            // sizes
-            for &size in &sizes[start..start + len] {
-                sizes_buffer.push(size);
+            // Find the bounding child range
+            let mut child_min = usize::MAX;
+            let mut child_max = 0usize;
+            for i in start..start + len {
+                let size = sizes[i].as_usize();
+                if size > 0 {
+                    let offset = offsets[i].as_usize();
+                    child_min = child_min.min(offset);
+                    child_max = child_max.max(offset + size);
+                }
             }
 
-            // the beauty of views is that we don't need to copy child_data, 
we just splat
-            // the offsets and sizes.
+            // Copy the child range
+            let child_base = if child_max > child_min {
+                let base = mutable.child_data[0].len();
+                mutable.child_data[0].extend(index, child_min, child_max);

Review Comment:
   Do you mean `extend_with_slice`?



##########
arrow-data/src/transform/list_view.rs:
##########
@@ -27,21 +27,55 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + 
CheckedAdd>(
     let offsets = array.buffer::<T>(0);
     let sizes = array.buffer::<T>(1);
     Box::new(
-        move |mutable: &mut _MutableArrayData, _index: usize, start: usize, 
len: usize| {
+        move |mutable: &mut _MutableArrayData, index: usize, start: usize, 
len: usize| {
             let offset_buffer = &mut mutable.buffer1;
             let sizes_buffer = &mut mutable.buffer2;
 
-            for &offset in &offsets[start..start + len] {
-                offset_buffer.push(offset);
-            }
+            // MutableArrayData builds a new independent array, so we must 
copy the child values
+            // that the source elements reference. Since ListView allows a
+            // non-contiguous offset layout we can do this efficiently by 
finding the
+            // bounding child range [child_min, child_max) that covers all 
elements being copied.
+            // We then bulk copy that entire range into the output child 
array, and
+            // remap each element's offset to point to its data at the new
+            // location. Sizes are unchanged.
+            //
+            // The copied range may include unreferenced child values in gaps
+            // between elements, but that is fine — ListView allows 
non-contiguous
+            // offsets, so the (offset, size) pairs precisely identify each
+            // element's data regardless of what else sits in the child array.
 
-            // sizes
-            for &size in &sizes[start..start + len] {
-                sizes_buffer.push(size);
+            // Find the bounding child range
+            let mut child_min = usize::MAX;
+            let mut child_max = 0usize;
+            for i in start..start + len {
+                let size = sizes[i].as_usize();
+                if size > 0 {
+                    let offset = offsets[i].as_usize();
+                    child_min = child_min.min(offset);
+                    child_max = child_max.max(offset + size);
+                }
             }
 
-            // the beauty of views is that we don't need to copy child_data, 
we just splat
-            // the offsets and sizes.
+            // Copy the child range
+            let child_base = if child_max > child_min {
+                let base = mutable.child_data[0].len();
+                mutable.child_data[0].extend(index, child_min, child_max);

Review Comment:
   Are you referring to `extend_with_slice`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to