sdf-jkl commented on code in PR #18363:
URL: https://github.com/apache/datafusion/pull/18363#discussion_r2475106083


##########
datafusion/functions-nested/src/flatten.rs:
##########
@@ -203,11 +232,11 @@ pub fn flatten_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
 
 // Create new offsets that are equivalent to `flatten` the array.
 fn get_offsets_for_flatten<O: OffsetSizeTrait>(
-    offsets: OffsetBuffer<O>,
-    indexes: OffsetBuffer<O>,
+    inner_offsets: OffsetBuffer<O>,
+    outer_offsets: OffsetBuffer<O>,

Review Comment:
   A little naming change because the previous one was confusing.



##########
datafusion/functions-nested/src/flatten.rs:
##########
@@ -179,7 +208,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> 
{
                     Ok(Arc::new(flattened_array) as ArrayRef)
                 }
                 LargeList(_) => {
-                    let (inner_field, inner_offsets, inner_values, nulls) =
+                    let (inner_field, inner_offsets, inner_values, _) = // _ 
instead of nulls?

Review Comment:
   Here `flattened_array` was generated using the inner nulls instead of the 
outer ones. I figured it should follow the same format and use nulls from the 
outer array.



##########
datafusion/functions-nested/src/flatten.rs:
##########
@@ -216,17 +245,47 @@ fn get_offsets_for_flatten<O: OffsetSizeTrait>(
 
 // Create new large offsets that are equivalent to `flatten` the array.
 fn get_large_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
-    offsets: OffsetBuffer<O>,
-    indexes: OffsetBuffer<P>,
+    inner_offsets: OffsetBuffer<O>,
+    outer_offsets: OffsetBuffer<P>,
 ) -> OffsetBuffer<i64> {
-    let buffer = offsets.into_inner();
-    let offsets: Vec<i64> = indexes
+    let buffer = inner_offsets.into_inner();
+    let offsets: Vec<i64> = outer_offsets
         .iter()
         .map(|i| buffer[i.to_usize().unwrap()].to_i64().unwrap())
         .collect();
     OffsetBuffer::new(offsets.into())
 }
 
+// Function for converting LargeList offsets into List offsets
+fn downcast_i64_inner_to_i32(
+    inner_offsets: &OffsetBuffer<i64>,
+    outer_offsets: &OffsetBuffer<i32>,
+) -> Result<OffsetBuffer<i32>, ArrowError> {
+    let buffer = inner_offsets.clone().into_inner();
+    let offsets: Result<Vec<i32>, _> = outer_offsets
+        .iter()
+        .map(|i| buffer[i.to_usize().unwrap()])
+        .map(|i| {
+            i32::try_from(i)
+                .map_err(|_| ArrowError::CastError(format!("Cannot downcast 
offset {i}")))
+        })
+        .collect();
+    Ok(OffsetBuffer::new(offsets?.into()))
+}
+
+// In case the conversion fails we convert the outer offsets into i64
+fn keep_offsets_i64(

Review Comment:
   This function takes the outer `i32` and inner `i64` offsets and keeps the 
inner `i64`



##########
datafusion/functions-nested/src/flatten.rs:
##########
@@ -154,7 +158,32 @@ pub fn flatten_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
                     Ok(Arc::new(flattened_array) as ArrayRef)
                 }
                 LargeList(_) => {
-                    exec_err!("flatten does not support type '{:?}'", 
array.data_type())?
+                    let (inner_field, inner_offsets, inner_values, _) =
+                        as_large_list_array(&values)?.clone().into_parts();
+                    // Try to downcast the inner offsets to i32
+                    match downcast_i64_inner_to_i32(&inner_offsets, &offsets) {
+                        Ok(i32offsets) => {
+                            let flattened_array = GenericListArray::<i32>::new(
+                                inner_field,
+                                i32offsets,
+                                inner_values,
+                                nulls,
+                            );
+                            Ok(Arc::new(flattened_array) as ArrayRef)
+                        }
+                        // If downcast fails we keep the offsets as is
+                        Err(_) => {
+                            // Fallback: keep i64 offsets → LargeList<i64>
+                            let i64offsets = keep_offsets_i64(inner_offsets, 
offsets);
+                            let flattened_array = GenericListArray::<i64>::new(
+                                inner_field,
+                                i64offsets,
+                                inner_values,
+                                nulls,
+                            );
+                            Ok(Arc::new(flattened_array) as ArrayRef)

Review Comment:
   Here we are trying to downcast the inner array offsets from `i64` to `i32` 
and if fail we fallback to i64 offsets.
   The fallback is not yet supported by the `return_type`



##########
datafusion/functions-nested/src/flatten.rs:
##########
@@ -95,7 +97,9 @@ impl ScalarUDFImpl for Flatten {
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         let data_type = match &arg_types[0] {
             List(field) => match field.data_type() {
-                List(field) | FixedSizeList(field, _) => 
List(Arc::clone(field)),
+                List(field) | LargeList(field) | FixedSizeList(field, _) => {
+                    List(Arc::clone(field))

Review Comment:
   Currently this only supports arrays that can be converted from `LargeList` 
to `List`.



##########
datafusion/functions-nested/src/flatten.rs:
##########
@@ -216,17 +245,47 @@ fn get_offsets_for_flatten<O: OffsetSizeTrait>(
 
 // Create new large offsets that are equivalent to `flatten` the array.
 fn get_large_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
-    offsets: OffsetBuffer<O>,
-    indexes: OffsetBuffer<P>,
+    inner_offsets: OffsetBuffer<O>,
+    outer_offsets: OffsetBuffer<P>,
 ) -> OffsetBuffer<i64> {
-    let buffer = offsets.into_inner();
-    let offsets: Vec<i64> = indexes
+    let buffer = inner_offsets.into_inner();
+    let offsets: Vec<i64> = outer_offsets
         .iter()
         .map(|i| buffer[i.to_usize().unwrap()].to_i64().unwrap())
         .collect();
     OffsetBuffer::new(offsets.into())
 }
 
+// Function for converting LargeList offsets into List offsets
+fn downcast_i64_inner_to_i32(
+    inner_offsets: &OffsetBuffer<i64>,
+    outer_offsets: &OffsetBuffer<i32>,
+) -> Result<OffsetBuffer<i32>, ArrowError> {
+    let buffer = inner_offsets.clone().into_inner();
+    let offsets: Result<Vec<i32>, _> = outer_offsets
+        .iter()
+        .map(|i| buffer[i.to_usize().unwrap()])
+        .map(|i| {
+            i32::try_from(i)
+                .map_err(|_| ArrowError::CastError(format!("Cannot downcast 
offset {i}")))
+        })
+        .collect();
+    Ok(OffsetBuffer::new(offsets?.into()))
+}

Review Comment:
   This function is trying to downcast offsets to `i32`.
   - On success it returns an `OffsetBuffer<i32>`
   - If it fails it errors out.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to