Jefffrey commented on code in PR #18137:
URL: https://github.com/apache/datafusion/pull/18137#discussion_r2644932802


##########
datafusion/common/src/utils/array_utils.rs:
##########
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Array concatenation utilities
+
+use std::sync::Arc;
+
+use crate::cast::as_generic_list_array;
+use crate::error::_exec_datafusion_err;
+use crate::utils::list_ndims;
+use crate::Result;
+use arrow::array::{
+    Array, ArrayData, ArrayRef, GenericListArray, NullBufferBuilder, 
OffsetSizeTrait,
+};
+use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::{DataType, Field};
+
+/// Concatenates arrays
+pub fn concat_arrays(args: &[ArrayRef]) -> Result<ArrayRef> {

Review Comment:
   This was pulled into datafusion-common in order to be used across 
datafusion-functions and datafusion-functions-nested? I'd prefer to keep it in 
datafusion-functions, as datafusion-functions-nested already has a dependency 
on that crate.



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -65,13 +71,64 @@ impl Default for ConcatFunc {
 
 impl ConcatFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::variadic(
-                vec![Utf8View, Utf8, LargeUtf8],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
+        }
+    }
+
+    /// Get the string type with highest precedence: Utf8View > LargeUtf8 > 
Utf8
+    fn get_string_type_precedence(&self, arg_types: &[DataType]) -> DataType {
+        use DataType::*;
+
+        for data_type in arg_types {
+            if data_type == &Utf8View {
+                return Utf8View;
+            }
+        }
+
+        for data_type in arg_types {
+            if data_type == &LargeUtf8 {
+                return LargeUtf8;
+            }
+        }
+
+        Utf8
+    }
+
+    /// Concatenate array arguments
+    fn concat_arrays(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.is_empty() {
+            return plan_err!("concat requires at least one argument");
         }
+
+        // Convert ColumnarValue arguments to ArrayRef
+        let arrays: Result<Vec<Arc<dyn Array>>> = args
+            .iter()
+            .map(|arg| match arg {
+                ColumnarValue::Array(arr) => Ok(Arc::clone(arr)),
+                ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(1),
+            })
+            .collect();
+        let arrays = arrays?;
+
+        // Check if all arrays are null - concat errors in this case

Review Comment:
   Why does it error? Is this based on existing behaviour?



##########
datafusion/functions-nested/src/utils.rs:
##########
@@ -267,60 +224,3 @@ pub(crate) fn get_map_entry_field(data_type: &DataType) -> 
Result<&Fields> {
         _ => internal_err!("Expected a Map type, got {data_type}"),
     }
 }
-
-#[cfg(test)]
-mod tests {

Review Comment:
   Were these tests moved as well? I don't see them anywhere



##########
datafusion/sqllogictest/test_files/information_schema.slt:
##########
@@ -770,8 +770,6 @@ datafusion public string_agg 1 OUT NULL String NULL false 1
 query TTTBI rowsort
 select specific_name, data_type, parameter_mode, is_variadic, rid from 
information_schema.parameters where specific_name = 'concat';
 ----

Review Comment:
   This still hasn't been addressed



##########
datafusion/functions-nested/src/concat.rs:
##########
@@ -462,7 +369,8 @@ fn array_append_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
-fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+/// Array_prepend SQL function
+pub fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {

Review Comment:
   Ditto



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -65,13 +71,64 @@ impl Default for ConcatFunc {
 
 impl ConcatFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::variadic(
-                vec![Utf8View, Utf8, LargeUtf8],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
+        }
+    }
+
+    /// Get the string type with highest precedence: Utf8View > LargeUtf8 > 
Utf8
+    fn get_string_type_precedence(&self, arg_types: &[DataType]) -> DataType {
+        use DataType::*;
+
+        for data_type in arg_types {
+            if data_type == &Utf8View {
+                return Utf8View;
+            }
+        }
+
+        for data_type in arg_types {
+            if data_type == &LargeUtf8 {
+                return LargeUtf8;
+            }
+        }
+
+        Utf8
+    }
+
+    /// Concatenate array arguments
+    fn concat_arrays(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.is_empty() {
+            return plan_err!("concat requires at least one argument");
         }
+
+        // Convert ColumnarValue arguments to ArrayRef
+        let arrays: Result<Vec<Arc<dyn Array>>> = args

Review Comment:
   Use `ColumnarValue::values_to_arrays()`
   
   
https://github.com/apache/datafusion/blob/902d3b32be4de94742cee025c1624b0855bd4f9f/datafusion/expr-common/src/columnar_value.rs#L229-L243



##########
datafusion/functions-nested/src/concat.rs:
##########
@@ -352,107 +351,15 @@ impl ScalarUDFImpl for ArrayConcat {
     }
 }
 
-fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.is_empty() {
-        return exec_err!("array_concat expects at least one argument");
-    }
-
-    let mut all_null = true;
-    let mut large_list = false;
-    for arg in args {
-        match arg.data_type() {
-            DataType::Null => continue,
-            DataType::LargeList(_) => large_list = true,
-            _ => (),
-        }
-        if arg.null_count() < arg.len() {
-            all_null = false;
-        }
-    }
-
-    if all_null {
-        // Return a null array with the same type as the first non-null-type 
argument
-        let return_type = args
-            .iter()
-            .map(|arg| arg.data_type())
-            .find_or_first(|d| !d.is_null())
-            .unwrap(); // Safe because args is non-empty
-
-        Ok(arrow::array::make_array(ArrayData::new_null(
-            return_type,
-            args[0].len(),
-        )))
-    } else if large_list {
-        concat_internal::<i64>(args)
-    } else {
-        concat_internal::<i32>(args)
-    }
-}
-
-fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let args = align_array_dimensions::<O>(args.to_vec())?;
-
-    let list_arrays = args
-        .iter()
-        .map(|arg| as_generic_list_array::<O>(arg))
-        .collect::<Result<Vec<_>>>()?;
-    // Assume number of rows is the same for all arrays
-    let row_count = list_arrays[0].len();
-
-    let mut array_lengths = vec![];
-    let mut arrays = vec![];
-    let mut valid = NullBufferBuilder::new(row_count);
-    for i in 0..row_count {
-        let nulls = list_arrays
-            .iter()
-            .map(|arr| arr.is_null(i))
-            .collect::<Vec<_>>();
-
-        // If all the arrays are null, the concatenated array is null
-        let is_null = nulls.iter().all(|&x| x);
-        if is_null {
-            array_lengths.push(0);
-            valid.append_null();
-        } else {
-            // Get all the arrays on i-th row
-            let values = list_arrays
-                .iter()
-                .map(|arr| arr.value(i))
-                .collect::<Vec<_>>();
-
-            let elements = values
-                .iter()
-                .map(|a| a.as_ref())
-                .collect::<Vec<&dyn Array>>();
-
-            // Concatenated array on i-th row
-            let concatenated_array = 
arrow::compute::concat(elements.as_slice())?;
-            array_lengths.push(concatenated_array.len());
-            arrays.push(concatenated_array);
-            valid.append_non_null();
-        }
-    }
-    // Assume all arrays have the same data type
-    let data_type = list_arrays[0].value_type();
-
-    let elements = arrays
-        .iter()
-        .map(|a| a.as_ref())
-        .collect::<Vec<&dyn Array>>();
-
-    let list_arr = GenericListArray::<O>::new(
-        Arc::new(Field::new_list_field(data_type, true)),
-        OffsetBuffer::from_lengths(array_lengths),
-        Arc::new(arrow::compute::concat(elements.as_slice())?),
-        valid.finish(),
-    );
-
-    Ok(Arc::new(list_arr))
+/// Array_concat/Array_cat SQL function
+pub fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    datafusion_common::utils::array_utils::concat_arrays(args)
 }
 
 // Kernel functions
 
-fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+/// Array_append SQL function
+pub fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {

Review Comment:
   Why are we adding public here?



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -65,13 +71,64 @@ impl Default for ConcatFunc {
 
 impl ConcatFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
-            signature: Signature::variadic(
-                vec![Utf8View, Utf8, LargeUtf8],
-                Volatility::Immutable,
-            ),
+            signature: Signature::user_defined(Volatility::Immutable),
+        }
+    }
+
+    /// Get the string type with highest precedence: Utf8View > LargeUtf8 > 
Utf8

Review Comment:
   What is this precedence logic based on?



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -287,7 +401,12 @@ impl ScalarUDFImpl for ConcatFunc {
     }
 }
 
-pub(crate) fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
+pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {

Review Comment:
   This doesn't need to be pub, pub(crate) was sufficient



##########
datafusion/common/src/utils/array_utils.rs:
##########
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Array concatenation utilities
+
+use std::sync::Arc;
+
+use crate::cast::as_generic_list_array;
+use crate::error::_exec_datafusion_err;
+use crate::utils::list_ndims;
+use crate::Result;
+use arrow::array::{
+    Array, ArrayData, ArrayRef, GenericListArray, NullBufferBuilder, 
OffsetSizeTrait,
+};
+use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::{DataType, Field};
+
+/// Concatenates arrays
+pub fn concat_arrays(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.is_empty() {
+        return Err(_exec_datafusion_err!(
+            "concat_arrays expects at least one argument"
+        ));
+    }
+
+    let mut all_null = true;
+    let mut large_list = false;
+    for arg in args {
+        match arg.data_type() {
+            DataType::Null => continue,
+            DataType::LargeList(_) => large_list = true,
+            _ => (),
+        }
+        if arg.null_count() < arg.len() {
+            all_null = false;
+        }
+    }
+
+    if all_null {
+        // Return a null array with the same type as the first non-null-type 
argument
+        let return_type = args
+            .iter()
+            .map(|arg| arg.data_type())
+            .find(|d| !d.is_null())
+            .unwrap_or_else(|| args[0].data_type());

Review Comment:
   Was this meant to be a lift and shift? I'm noticing small differences like 
how this used to be:
   
   ```rust
           let return_type = args
               .iter()
               .map(|arg| arg.data_type())
               .find_or_first(|d| !d.is_null())
               .unwrap(); // Safe because args is non-empty
   ```
   
   Could we call out any refactors that were done as part of the lift and 
shift, otherwise it is hard to spot in review.



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -88,37 +145,91 @@ impl ScalarUDFImpl for ConcatFunc {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        use DataType::*;
+
+        if arg_types.is_empty() {
+            return plan_err!("concat requires at least one argument");
+        }
+
+        let has_arrays = arg_types
+            .iter()
+            .any(|dt| matches!(dt, List(_) | LargeList(_) | FixedSizeList(_, 
_)));
+        let has_non_arrays = arg_types
+            .iter()
+            .any(|dt| !matches!(dt, List(_) | LargeList(_) | FixedSizeList(_, 
_) | Null));
+
+        if has_arrays && has_non_arrays {
+            return plan_err!(
+                "Cannot mix array and non-array arguments in concat function. \
+                Use concat(array1, array2, ...) for arrays or concat(str1, 
str2, ...) for strings, but not both."

Review Comment:
   ```suggestion
                   "Cannot mix array and non-array arguments in concat function.
   ```
   
   Remove redundant information



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -88,37 +145,91 @@ impl ScalarUDFImpl for ConcatFunc {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        use DataType::*;
+
+        if arg_types.is_empty() {
+            return plan_err!("concat requires at least one argument");
+        }
+
+        let has_arrays = arg_types
+            .iter()
+            .any(|dt| matches!(dt, List(_) | LargeList(_) | FixedSizeList(_, 
_)));
+        let has_non_arrays = arg_types
+            .iter()
+            .any(|dt| !matches!(dt, List(_) | LargeList(_) | FixedSizeList(_, 
_) | Null));
+
+        if has_arrays && has_non_arrays {
+            return plan_err!(
+                "Cannot mix array and non-array arguments in concat function. \
+                Use concat(array1, array2, ...) for arrays or concat(str1, 
str2, ...) for strings, but not both."
+            );
+        }
+
+        if has_arrays {
+            return Ok(arg_types.to_vec());
+        }
+
+        let target_type = self.get_string_type_precedence(arg_types);
+
+        // Only coerce types that need coercion, keep string types as-is
+        let coerced_types = arg_types
+            .iter()
+            .map(|data_type| match data_type {
+                Utf8View | Utf8 | LargeUtf8 => data_type.clone(),
+                _ => target_type.clone(),
+            })
+            .collect();
+        Ok(coerced_types)
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         use DataType::*;
-        let mut dt = &Utf8;
-        arg_types.iter().for_each(|data_type| {
-            if data_type == &Utf8View {
-                dt = data_type;
-            }
-            if data_type == &LargeUtf8 && dt != &Utf8View {
-                dt = data_type;
+
+        // Check if any argument is an array type
+        for data_type in arg_types {
+            if let List(field) | LargeList(field) | FixedSizeList(field, _) = 
data_type {
+                return Ok(List(Arc::new(arrow::datatypes::Field::new(
+                    "item",
+                    field.data_type().clone(),
+                    true,
+                ))));
             }
-        });
+        }
 
-        Ok(dt.to_owned())
+        // For non-array arguments, return string type based on precedence
+        let dt = self.get_string_type_precedence(arg_types);
+        Ok(dt)
     }
 
     /// Concatenates the text representations of all the arguments. NULL 
arguments are ignored.
     /// concat('abcde', 2, NULL, 22) = 'abcde222'
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+        use DataType::*;
         let ScalarFunctionArgs { args, .. } = args;
 
-        let mut return_datatype = DataType::Utf8;
-        args.iter().for_each(|col| {
-            if col.data_type() == DataType::Utf8View {
-                return_datatype = col.data_type();
-            }
-            if col.data_type() == DataType::LargeUtf8
-                && return_datatype != DataType::Utf8View
-            {
-                return_datatype = col.data_type();
+        if args.is_empty() {
+            return plan_err!("concat requires at least one argument");
+        }
+
+        for arg in &args {
+            let is_array = match arg {
+                ColumnarValue::Array(array) => matches!(
+                    array.data_type(),
+                    List(_) | LargeList(_) | FixedSizeList(_, _)
+                ),
+                ColumnarValue::Scalar(scalar) => matches!(
+                    scalar.data_type(),

Review Comment:
   Similarly, functions should have been coerced to either all arrays or all 
strings, so we should be able to check only the first argument



##########
datafusion/sqllogictest/test_files/spark/string/concat.slt:
##########
@@ -46,3 +46,39 @@ SELECT concat(a, b, c) from (select 'a' a, 'b' b, 'c' c 
union all select null a,
 ----
 abc
 NULL
+
+# Test array concatenation
+query ?
+SELECT concat([1, 2], [3, 4]);
+----
+[1, 2, 3, 4]
+
+query ?
+SELECT concat([1, 2], [3, 4], [5, 6]);
+----
+[1, 2, 3, 4, 5, 6]
+
+# Test array concatenation with nulls - Spark returns NULL if any argument is 
NULL
+query ?
+SELECT concat([1, 2], NULL, [3, 4]);
+----
+NULL
+
+# Test array concatenation with empty arrays - Arrow limitation with Null vs 
Int64 types
+statement error Arrow error: Invalid argument error: It is not possible to 
concatenate arrays of different data types
+SELECT concat([], [1, 2]);
+
+statement error Arrow error: Invalid argument error: It is not possible to 
concatenate arrays of different data types  
+SELECT concat([1, 2], []);

Review Comment:
   This seems like a weird bug to have; perhaps can address in a followup



##########
datafusion/functions-nested/src/concat.rs:
##########
@@ -352,107 +351,15 @@ impl ScalarUDFImpl for ArrayConcat {
     }
 }
 
-fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.is_empty() {
-        return exec_err!("array_concat expects at least one argument");
-    }
-
-    let mut all_null = true;
-    let mut large_list = false;
-    for arg in args {
-        match arg.data_type() {
-            DataType::Null => continue,
-            DataType::LargeList(_) => large_list = true,
-            _ => (),
-        }
-        if arg.null_count() < arg.len() {
-            all_null = false;
-        }
-    }
-
-    if all_null {
-        // Return a null array with the same type as the first non-null-type 
argument
-        let return_type = args
-            .iter()
-            .map(|arg| arg.data_type())
-            .find_or_first(|d| !d.is_null())
-            .unwrap(); // Safe because args is non-empty
-
-        Ok(arrow::array::make_array(ArrayData::new_null(
-            return_type,
-            args[0].len(),
-        )))
-    } else if large_list {
-        concat_internal::<i64>(args)
-    } else {
-        concat_internal::<i32>(args)
-    }
-}
-
-fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let args = align_array_dimensions::<O>(args.to_vec())?;
-
-    let list_arrays = args
-        .iter()
-        .map(|arg| as_generic_list_array::<O>(arg))
-        .collect::<Result<Vec<_>>>()?;
-    // Assume number of rows is the same for all arrays
-    let row_count = list_arrays[0].len();
-
-    let mut array_lengths = vec![];
-    let mut arrays = vec![];
-    let mut valid = NullBufferBuilder::new(row_count);
-    for i in 0..row_count {
-        let nulls = list_arrays
-            .iter()
-            .map(|arr| arr.is_null(i))
-            .collect::<Vec<_>>();
-
-        // If all the arrays are null, the concatenated array is null
-        let is_null = nulls.iter().all(|&x| x);
-        if is_null {
-            array_lengths.push(0);
-            valid.append_null();
-        } else {
-            // Get all the arrays on i-th row
-            let values = list_arrays
-                .iter()
-                .map(|arr| arr.value(i))
-                .collect::<Vec<_>>();
-
-            let elements = values
-                .iter()
-                .map(|a| a.as_ref())
-                .collect::<Vec<&dyn Array>>();
-
-            // Concatenated array on i-th row
-            let concatenated_array = 
arrow::compute::concat(elements.as_slice())?;
-            array_lengths.push(concatenated_array.len());
-            arrays.push(concatenated_array);
-            valid.append_non_null();
-        }
-    }
-    // Assume all arrays have the same data type
-    let data_type = list_arrays[0].value_type();
-
-    let elements = arrays
-        .iter()
-        .map(|a| a.as_ref())
-        .collect::<Vec<&dyn Array>>();
-
-    let list_arr = GenericListArray::<O>::new(
-        Arc::new(Field::new_list_field(data_type, true)),
-        OffsetBuffer::from_lengths(array_lengths),
-        Arc::new(arrow::compute::concat(elements.as_slice())?),
-        valid.finish(),
-    );
-
-    Ok(Arc::new(list_arr))
+/// Array_concat/Array_cat SQL function
+pub fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    datafusion_common::utils::array_utils::concat_arrays(args)

Review Comment:
   Does this need to be public? And do we need this thin wrapper function?



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -302,59 +421,85 @@ pub(crate) fn simplify_concat(args: Vec<Expr>) -> 
Result<ExprSimplifyResult> {
         ConcatFunc::new().return_type(&data_types)
     }?;
 
-    for arg in args.clone() {
+    for arg in args.iter() {
         match arg {
             Expr::Literal(ScalarValue::Utf8(None), _) => {}
-            Expr::Literal(ScalarValue::LargeUtf8(None), _) => {
-            }
-            Expr::Literal(ScalarValue::Utf8View(None), _) => { }
+            Expr::Literal(ScalarValue::LargeUtf8(None), _) => {}
+            Expr::Literal(ScalarValue::Utf8View(None), _) => {}
 
             // filter out `null` args
             // All literals have been converted to Utf8 or LargeUtf8 in 
type_coercion.
             // Concatenate it with the `contiguous_scalar`.
             Expr::Literal(ScalarValue::Utf8(Some(v)), _) => {
-                contiguous_scalar += &v;
+                contiguous_scalar += v;
             }
             Expr::Literal(ScalarValue::LargeUtf8(Some(v)), _) => {
-                contiguous_scalar += &v;
+                contiguous_scalar += v;
             }
             Expr::Literal(ScalarValue::Utf8View(Some(v)), _) => {
-                contiguous_scalar += &v;
+                contiguous_scalar += v;
             }
 
-            Expr::Literal(x, _) => {
-                return internal_err!(
-                    "The scalar {x} should be casted to string type during the 
type coercion."
-                )
+            Expr::Literal(scalar_val, _) => {
+                // Convert non-string, non-array literals to their string 
representation
+                // Skip array literals - they should be handled at runtime

Review Comment:
   Why do we need this handling code now? Wouldn't coerce_types ensure we have 
the right types?



##########
datafusion/functions/src/string/concat.rs:
##########
@@ -88,37 +145,91 @@ impl ScalarUDFImpl for ConcatFunc {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        use DataType::*;
+
+        if arg_types.is_empty() {
+            return plan_err!("concat requires at least one argument");
+        }
+
+        let has_arrays = arg_types
+            .iter()
+            .any(|dt| matches!(dt, List(_) | LargeList(_) | FixedSizeList(_, 
_)));
+        let has_non_arrays = arg_types
+            .iter()
+            .any(|dt| !matches!(dt, List(_) | LargeList(_) | FixedSizeList(_, 
_) | Null));
+
+        if has_arrays && has_non_arrays {
+            return plan_err!(
+                "Cannot mix array and non-array arguments in concat function. \
+                Use concat(array1, array2, ...) for arrays or concat(str1, 
str2, ...) for strings, but not both."
+            );
+        }
+
+        if has_arrays {
+            return Ok(arg_types.to_vec());
+        }
+
+        let target_type = self.get_string_type_precedence(arg_types);
+
+        // Only coerce types that need coercion, keep string types as-is
+        let coerced_types = arg_types
+            .iter()
+            .map(|data_type| match data_type {
+                Utf8View | Utf8 | LargeUtf8 => data_type.clone(),
+                _ => target_type.clone(),
+            })
+            .collect();
+        Ok(coerced_types)
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         use DataType::*;
-        let mut dt = &Utf8;
-        arg_types.iter().for_each(|data_type| {
-            if data_type == &Utf8View {
-                dt = data_type;
-            }
-            if data_type == &LargeUtf8 && dt != &Utf8View {
-                dt = data_type;
+
+        // Check if any argument is an array type
+        for data_type in arg_types {
+            if let List(field) | LargeList(field) | FixedSizeList(field, _) = 
data_type {
+                return Ok(List(Arc::new(arrow::datatypes::Field::new(
+                    "item",
+                    field.data_type().clone(),
+                    true,
+                ))));
             }
-        });
+        }
 
-        Ok(dt.to_owned())
+        // For non-array arguments, return string type based on precedence
+        let dt = self.get_string_type_precedence(arg_types);
+        Ok(dt)

Review Comment:
   return_type is called after coercion, so we don't need to repeat this logic 
and can simply check the first argument



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to