Jefffrey commented on code in PR #20049:
URL: https://github.com/apache/datafusion/pull/20049#discussion_r2740509922


##########
datafusion/functions-nested/src/repeat.rs:
##########
@@ -243,56 +240,75 @@ fn general_list_repeat<O: OffsetSizeTrait>(
     list_array: &GenericListArray<O>,
     count_array: &UInt64Array,
 ) -> Result<ArrayRef> {
-    let data_type = list_array.data_type();
-    let value_type = list_array.value_type();
-    let mut new_values = vec![];
+    let counts = count_array.values();

Review Comment:
   What happens if an element of `count_array` is null? It looks like we still 
access the underlying primitive?



##########
datafusion/functions-nested/src/repeat.rs:
##########
@@ -243,56 +240,75 @@ fn general_list_repeat<O: OffsetSizeTrait>(
     list_array: &GenericListArray<O>,
     count_array: &UInt64Array,
 ) -> Result<ArrayRef> {
-    let data_type = list_array.data_type();
-    let value_type = list_array.value_type();
-    let mut new_values = vec![];
+    let counts = count_array.values();
+    let list_offsets = list_array.value_offsets();
 
-    let count_vec = count_array
-        .values()
-        .to_vec()
+    // calculate capacities for pre-allocation
+    let outer_total = counts.iter().map(|&c| c as usize).sum();
+    let inner_total = counts
         .iter()
-        .map(|x| *x as usize)
-        .collect::<Vec<_>>();
-
-    for (list_array_row, &count) in list_array.iter().zip(count_vec.iter()) {
-        let list_arr = match list_array_row {
-            Some(list_array_row) => {
-                let original_data = list_array_row.to_data();
-                let capacity = Capacities::Array(original_data.len() * count);
-                let mut mutable = MutableArrayData::with_capacities(
-                    vec![&original_data],
-                    false,
-                    capacity,
-                );
-
-                for _ in 0..count {
-                    mutable.extend(0, 0, original_data.len());
-                }
-
-                let data = mutable.freeze();
-                let repeated_array = arrow::array::make_array(data);
-
-                let list_arr = GenericListArray::<O>::try_new(
-                    Arc::new(Field::new_list_field(value_type.clone(), true)),
-                    OffsetBuffer::<O>::from_lengths(vec![original_data.len(); 
count]),
-                    repeated_array,
-                    None,
-                )?;
-                Arc::new(list_arr) as ArrayRef
-            }
-            None => new_null_array(data_type, count),
-        };
-        new_values.push(list_arr);
+        .enumerate()
+        .filter(|&(i, _)| !list_array.is_null(i))
+        .map(|(i, &c)| {
+            let len = list_offsets[i + 1].to_usize().unwrap()
+                - list_offsets[i].to_usize().unwrap();
+            len * (c as usize)
+        })
+        .sum();
+
+    // Build outer offsets
+    let mut outer_offsets = Vec::with_capacity(counts.len() + 1);
+    outer_offsets.push(O::zero());
+    let mut running_offset = 0usize;
+    for &count in counts.iter() {

Review Comment:
   Can use 
[`OffsetBuffer::from_lengths`](https://docs.rs/arrow/latest/arrow/buffer/struct.OffsetBuffer.html#method.from_lengths)
 here I believe?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to