vegarsti commented on code in PR #18432:
URL: https://github.com/apache/datafusion/pull/18432#discussion_r2484242278


##########
datafusion/functions-nested/src/extract.rs:
##########
@@ -1028,4 +1194,71 @@ mod tests {
             fixed_size_list_type
         );
     }
+
+    #[test]
+    fn test_array_slice_list_view_basic() -> Result<()> {
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let offsets = ScalarBuffer::from(vec![0, 3]);
+        let sizes = ScalarBuffer::from(vec![3, 2]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+
+        let from = Int64Array::from(vec![2, 1]);
+        let to = Int64Array::from(vec![3, 2]);
+
+        let result = general_list_view_array_slice::<i32>(
+            &array,
+            &from,
+            &to,
+            None::<&Int64Array>,
+        )?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        assert_eq!(list_view_values(result), vec![vec![2, 3], vec![4, 5]]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_slice_list_view_non_monotonic_offsets() -> Result<()> {
+        // First list references the tail of the values buffer, second list 
references the head.
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let offsets = ScalarBuffer::from(vec![3, 0]);
+        let sizes = ScalarBuffer::from(vec![2, 3]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+
+        let from = Int64Array::from(vec![1, 1]);
+        let to = Int64Array::from(vec![2, 2]);
+
+        let result = general_list_view_array_slice::<i32>(
+            &array,
+            &from,
+            &to,
+            None::<&Int64Array>,
+        )?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        assert_eq!(list_view_values(result), vec![vec![4, 5], vec![1, 2]]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_slice_list_view_negative_stride() -> Result<()> {
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let offsets = ScalarBuffer::from(vec![0, 3]);
+        let sizes = ScalarBuffer::from(vec![3, 2]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+
+        let from = Int64Array::from(vec![3, 2]);
+        let to = Int64Array::from(vec![1, 1]);
+        let stride = Int64Array::from(vec![-1, -1]);
+
+        let result =
+            general_list_view_array_slice::<i32>(&array, &from, &to, 
Some(&stride))?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        assert_eq!(list_view_values(result), vec![vec![3, 2, 1], vec![5, 4]]);
+        Ok(())
+    }

Review Comment:
   If you want a test for out of order offsets, here's one I made for myself to 
verify.
   
   ```
   
       #[test]
       fn test_array_slice_list_view_out_of_order() -> Result<()> {
           let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 
5]));
           let offsets = ScalarBuffer::from(vec![3, 1, 0]);
           let sizes = ScalarBuffer::from(vec![2, 2, 1]);
           let field = Arc::new(Field::new("item", DataType::Int32, true));
           let array = ListViewArray::new(field, offsets, sizes, values, None);
           assert_eq!(
               list_view_values(&array),
               vec![vec![4, 5], vec![2, 3], vec![1]]
           );
   
           let from = Int64Array::from(vec![2, 2, 2]);
           let to = Int64Array::from(vec![1, 1, 1]);
           let stride = Int64Array::from(vec![-1, -1, -1]);
   
           let result =
               general_list_view_array_slice::<i32>(&array, &from, &to, 
Some(&stride))?;
           let result = result.as_ref().as_list_view::<i32>();
   
           assert_eq!(
               list_view_values(result),
               vec![vec![5, 4], vec![3, 2], vec![]]
           );
           Ok(())
       }
   ```



##########
datafusion/functions-nested/src/extract.rs:
##########
@@ -644,6 +691,109 @@ where
     )?))
 }
 
+fn general_list_view_array_slice<O: OffsetSizeTrait>(
+    array: &GenericListViewArray<O>,
+    from_array: &Int64Array,
+    to_array: &Int64Array,
+    stride: Option<&Int64Array>,
+) -> Result<ArrayRef>
+where
+    i64: TryInto<O>,
+{
+    let values = array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], true, 
capacity);
+
+    // We must build `offsets` and `sizes` buffers manually as ListView does 
not enforce
+    // monotonically increasing offsets.
+    let mut offsets = Vec::with_capacity(array.len());
+    let mut sizes = Vec::with_capacity(array.len());
+    let mut current_offset = O::usize_as(0);
+    let mut null_builder = NullBufferBuilder::new(array.len());
+
+    for row_index in 0..array.len() {
+        // Propagate NULL semantics: any NULL input yields a NULL output slot.
+        if array.is_null(row_index)
+            || from_array.is_null(row_index)
+            || to_array.is_null(row_index)
+        {
+            null_builder.append_null();
+            offsets.push(current_offset);
+            sizes.push(O::usize_as(0));
+            continue;
+        }
+        null_builder.append_non_null();
+
+        let len = array.value_size(row_index);
+
+        // Empty arrays always return an empty array.
+        if len == O::usize_as(0) {
+            offsets.push(current_offset);
+            sizes.push(O::usize_as(0));
+            continue;
+        }
+
+        let slice_plan = compute_slice_plan::<O>(
+            len,
+            from_array.value(row_index),
+            to_array.value(row_index),
+            stride.map(|s| s.value(row_index)),
+        )?;
+
+        let start = array.value_offset(row_index);
+        match slice_plan {
+            SlicePlan::Empty => {
+                offsets.push(current_offset);
+                sizes.push(O::usize_as(0));
+            }
+            SlicePlan::Contiguous {
+                start: rel_start,
+                len: slice_len,
+            } => {
+                let start_index = (start + rel_start).to_usize().unwrap();
+                let end_index = (start + rel_start + 
slice_len).to_usize().unwrap();
+                mutable.extend(0, start_index, end_index);
+                offsets.push(current_offset);
+                sizes.push(slice_len);
+                current_offset += slice_len;
+            }
+            SlicePlan::Indices(indices) => {
+                let count = indices.len();
+                for rel_index in indices {
+                    let absolute_index = (start + 
rel_index).to_usize().unwrap();
+                    mutable.extend(0, absolute_index, absolute_index + 1);
+                }
+                let length = O::usize_as(count);
+                offsets.push(current_offset);
+                sizes.push(length);
+                current_offset += length;
+            }
+        }
+    }
+
+    let data = mutable.freeze();
+    let field = match array.data_type() {
+        ListView(field) | LargeListView(field) => Arc::clone(field),
+        other => {
+            return Err(internal_datafusion_err!(
+                "array_slice got unexpected data type: {}",
+                other
+            ));
+        }
+    };

Review Comment:
   Suggest moving this to before the slice to avoid unnecessary work



##########
datafusion/functions-nested/src/extract.rs:
##########
@@ -1028,4 +1194,71 @@ mod tests {
             fixed_size_list_type
         );
     }
+
+    #[test]

Review Comment:
   Nice tests! This PR for reverse of `FixedSizeList` also added sqllogictest 
cases 
https://github.com/apache/datafusion/pull/16423/files#diff-317c67cc9ce87268e4ccec1cb75316eed82f99ae2ffc226874e5897913ffa4c8
   
   It doesn't look like that is currently possible for `ListView` on arrow-rs 
57, as it hits this path, which did not have a branch for `ListView` at the 
time of release
   
https://github.com/apache/arrow-rs/blob/062d766a9c3070d191d1a1fd0baca01b9d13994f/arrow-schema/src/datatype_parse.rs#L70-L96



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to