jayzhan211 commented on code in PR #7897:
URL: https://github.com/apache/arrow-datafusion/pull/7897#discussion_r1379462447


##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1478,6 +1481,69 @@ macro_rules! to_string {
     }};
 }
 
+fn deduplicate_array(arg:ArrayRef) -> Result<ArrayRef> {
+    let list_arr = as_list_array(&arg)?;
+    let row_number = list_arr.len();
+    for i in 0..row_number {
+        let arr = list_arr.value(i);
+        let i64arr = as_primitive_array::<Int64Type>(&arr);
+        for v in i64arr.iter() {
+            // v is Option<i64>
+        }
+    }
+    let row_converter = RowConverter::new(vec![
+        SortField::new(
+            arg.data_type().clone()
+        )
+    ]
+    )?;
+    let converted = row_converter.convert_columns(&[arg])?;
+    let mut distinct_rows = row_converter.empty_rows(converted.num_rows(), 
converted.size());
+    let mut dedup: HashSet<Row> = HashSet::with_capacity(converted.num_rows());
+    converted.iter().filter(|row| dedup.insert(*row)).for_each(|row| 
distinct_rows.push(row));
+    let dedup =  row_converter.convert_rows(&distinct_rows)?;
+    let res =  make_array(dedup.as_slice())?;
+    
+    Ok(res)
+}
+
+
+/// Array_union SQL function
+pub fn array_union(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 2 {
+        return exec_err!("array_union needs two arguments")
+    }
+    let array1 = &args[0];
+    let array2= &args[1];
+
+    check_datatypes("array_union", &[array1, array2])?;
+    let list1 = as_list_array(array1)?;
+    let list2 = as_list_array(array2)?;
+    
+    match (list1.value_type(), list2.value_type()){
+        (DataType::Null, _) => {
+            Ok(array2.clone())
+        },
+        (_, DataType::Null) => {
+            Ok(array1.clone())
+        }
+        (DataType::List(_), DataType::List(_)) => {
+            let result = concat_internal(args)?;
+            deduplicate_array(result)
+        },
+        // These are the same as confirmed from check_datatypes
+        (_data_type1, _data_type_2) => {
+            eprintln!("Array1 {:?}",array1);
+            eprintln!("Array2 {:?}",array2);
+            let arrays = vec![array1.as_ref(), array2.as_ref()];
+            let result = arrow::compute::concat(arrays.as_slice())?;
+            eprintln!("Result {:?}",result);
+            deduplicate_array(result)

Review Comment:
   I dont know how to reproduce your error, and the code I saw it
   
   ```rust
   fn deduplicate_array(arg:ArrayRef) -> Result<ArrayRef> {
       let list_arr = as_list_array(&arg)?;
       let row_number = list_arr.len();
       for i in 0..row_number {
           let arr = list_arr.value(i);
           let i64arr = as_primitive_array::<Int64Type>(&arr);
           for v in i64arr.iter() {
               // v is Option<i64>
           }
       }
       let row_converter = RowConverter::new(vec![
           SortField::new(
               arg.data_type().clone()
           )
       ]
       )?;
       let converted = row_converter.convert_columns(&[arg])?;
       let mut distinct_rows = row_converter.empty_rows(converted.num_rows(), 
converted.size());
       let mut dedup: HashSet<Row> = 
HashSet::with_capacity(converted.num_rows());
       converted.iter().filter(|row| dedup.insert(*row)).for_each(|row| 
distinct_rows.push(row));
       let dedup =  row_converter.convert_rows(&distinct_rows)?;
       let res =  make_array(dedup.as_slice())?;
       
       Ok(res)
   }
   ```
   
   It seems the code is uncompleted



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to