rluvaton commented on code in PR #21679:
URL: https://github.com/apache/datafusion/pull/21679#discussion_r3110264385


##########
datafusion/common/src/utils/mod.rs:
##########
@@ -976,11 +983,176 @@ pub fn take_function_args<const N: usize, T>(
     })
 }
 
+/// Returns the inner values of a list, or an error otherwise
+/// For [`ListArray`] and [`LargeListArray`], if it's sliced, it returns a
+/// sliced array too. Therefore, too reconstruct a list using it,
+/// you must adjust the offsets using [`adjust_offsets_for_slice`]
+pub fn list_values(array: &dyn Array) -> Result<ArrayRef> {
+    match array.data_type() {
+        DataType::List(_) => Ok(sliced_list_values(array.as_list::<i32>())),
+        DataType::LargeList(_) => 
Ok(sliced_list_values(array.as_list::<i64>())),
+        DataType::FixedSizeList(_, _) => {
+            Ok(Arc::clone(array.as_fixed_size_list().values()))
+        }
+        other => _exec_err!("expected list, got {other}"),
+    }
+}
+
+fn sliced_list_values<O: OffsetSizeTrait>(list: &GenericListArray<O>) -> 
ArrayRef {
+    let values = list.values();
+    let offsets = list.offsets();
+
+    if let (Some(first), Some(last)) = (offsets.first(), offsets.last()) {
+        let first = first.as_usize();
+        let last = last.as_usize();
+
+        if first != 0 || last != values.len() {
+            return values.slice(first, last - first);
+        }
+    }
+
+    Arc::clone(values)
+}
+
+/// If `list` is sliced, returns an adjusted offset buffer so that
+/// it points to the sliced portion of the list values, and not the whole list 
values
+pub fn adjust_offsets_for_slice<O: OffsetSizeTrait>(
+    list: &GenericListArray<O>,
+) -> OffsetBuffer<O> {
+    let offsets = list.offsets();
+
+    if let (Some(first), Some(last)) = (offsets.first(), offsets.last())
+        && (!first.is_zero() || last.as_usize() != list.values().len())
+    {
+        let offsets = offsets.iter().map(|offset| *offset - *first).collect();
+
+        //todo: use unsafe Offset::new_unchecked?
+        return OffsetBuffer::new(offsets);
+    }
+
+    offsets.clone()
+}
+
+/// For lists and large lists, truncates the sublist of null values
+///
+/// For fixed size lists, if there's any valid value, replace all null values 
with it,
+/// otherwise return the array unchanged
+pub fn remove_list_null_values(array: &ArrayRef) -> Result<ArrayRef> {
+    // todo: handle list view and map
+    match array.data_type() {
+        DataType::List(_) => 
Ok(Arc::new(truncate_list_nulls(array.as_list::<i32>())?)),
+        DataType::LargeList(_) => {
+            Ok(Arc::new(truncate_list_nulls(array.as_list::<i64>())?))
+        }
+        DataType::FixedSizeList(_, _) => replace_nulls_with_first_valid(array),

Review Comment:
   this can cause unintended behavior, like if I have random in the lambda 
expression or other expression that have state this will mutate more than needed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to