cfmcgrady commented on code in PR #2643:
URL: https://github.com/apache/datafusion-comet/pull/2643#discussion_r2544098080


##########
native/spark-expr/src/array_funcs/array_insert.rs:
##########
@@ -198,114 +197,124 @@ fn array_insert<O: OffsetSizeTrait>(
     pos_array: &ArrayRef,
     legacy_mode: bool,
 ) -> DataFusionResult<ColumnarValue> {
-    // The code is based on the implementation of the array_append from the 
Apache DataFusion
-    // 
https://github.com/apache/datafusion/blob/main/datafusion/functions-nested/src/concat.rs#L513
-    //
-    // This code is also based on the implementation of the array_insert from 
the Apache Spark
-    // 
https://github.com/apache/spark/blob/branch-3.5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L4713
+    // Implementation aligned with Arrow's half-open offset ranges and Spark 
semantics.
 
     let values = list_array.values();
     let offsets = list_array.offsets();
     let values_data = values.to_data();
     let item_data = items_array.to_data();
+
+    // Estimate capacity (original values + inserted items upper bound)
     let new_capacity = Capacities::Array(values_data.len() + item_data.len());
 
     let mut mutable_values =
         MutableArrayData::with_capacities(vec![&values_data, &item_data], 
true, new_capacity);
 
-    let mut new_offsets = vec![O::usize_as(0)];
-    let mut new_nulls = Vec::<bool>::with_capacity(list_array.len());
+    // New offsets and top-level list validity bitmap
+    let mut new_offsets = Vec::with_capacity(list_array.len() + 1);
+    new_offsets.push(O::usize_as(0));
+    let mut list_valid = Vec::<bool>::with_capacity(list_array.len());
 
-    let pos_data: &Int32Array = as_primitive_array(&pos_array); // Spark 
supports only i32 for positions
+    // Spark supports only Int32 position indices
+    let pos_data: &Int32Array = as_primitive_array(&pos_array);
 
-    for (row_index, offset_window) in offsets.windows(2).enumerate() {
-        let pos = pos_data.values()[row_index];
-        let start = offset_window[0].as_usize();
-        let end = offset_window[1].as_usize();
-        let is_item_null = items_array.is_null(row_index);
+    for (row_index, window) in offsets.windows(2).enumerate() {
+        let start = window[0].as_usize();
+        let end = window[1].as_usize();
+        let len = end - start;
+        let pos = pos_data.value(row_index);

Review Comment:
   Addressed, return null for the entire row when pos is null.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to