jayzhan211 commented on code in PR #9108:
URL: https://github.com/apache/arrow-datafusion/pull/9108#discussion_r1501928036


##########
datafusion/expr/src/signature.rs:
##########
@@ -133,25 +136,168 @@ pub enum ArrayFunctionSignature {
     /// The first argument's list dimension should be one dimension less than 
the second argument's list dimension.
     ElementAndArray,
     /// Specialized Signature for Array functions of the form (List/LargeList, 
Index)
+    /// The first argument should be List/LargeList/FixedSizedList, and the 
second argument should be Int64.
     ArrayAndIndex,
     /// Specialized Signature for Array functions of the form (List/LargeList, 
Element, Optional Index)
     ArrayAndElementAndOptionalIndex,
+    /// Specialized Signature for ArrayEmpty and similar functions
+    /// The function takes a single argument that must be a 
List/LargeList/FixedSizeList
+    /// or something that can be coerced to one of those types.
+    Array,
+}
+
+impl ArrayFunctionSignature {
+    /// Arguments to ArrayFunctionSignature
+    /// `current_types` - The data types of the arguments
+    /// Returns the valid types for the function signature
+    pub fn get_type_signature(
+        &self,
+        current_types: &[DataType],
+    ) -> Result<Vec<Vec<DataType>>> {
+        fn array_element_and_optional_index(
+            current_types: &[DataType],
+        ) -> Result<Vec<Vec<DataType>>> {
+            // make sure there's 2 or 3 arguments
+            if !(current_types.len() == 2 || current_types.len() == 3) {
+                return Ok(vec![vec![]]);
+            }
+
+            let first_two_types = &current_types[0..2];
+            let mut valid_types =
+                array_append_or_prepend_valid_types(first_two_types, true)?;
+
+            // Early return if there are only 2 arguments
+            if current_types.len() == 2 {
+                return Ok(valid_types);
+            }
+
+            let valid_types_with_index = valid_types
+                .iter()
+                .map(|t| {
+                    let mut t = t.clone();
+                    t.push(DataType::Int64);
+                    t
+                })
+                .collect::<Vec<_>>();
+
+            valid_types.extend(valid_types_with_index);
+
+            Ok(valid_types)
+        }
+
+        fn array_append_or_prepend_valid_types(
+            current_types: &[DataType],
+            is_append: bool,
+        ) -> Result<Vec<Vec<DataType>>> {
+            if current_types.len() != 2 {
+                return Ok(vec![vec![]]);
+            }
+
+            let (array_type, elem_type) = if is_append {
+                (&current_types[0], &current_types[1])
+            } else {
+                (&current_types[1], &current_types[0])
+            };
+
+            // We follow Postgres on `array_append(Null, T)`, which is not 
valid.
+            if array_type.eq(&DataType::Null) {
+                return Ok(vec![vec![]]);
+            }
+
+            // We need to find the coerced base type, mainly for cases like:
+            // `array_append(List(null), i64)` -> `List(i64)`
+            let array_base_type = 
datafusion_common::utils::base_type(array_type);
+            let elem_base_type = 
datafusion_common::utils::base_type(elem_type);
+            let new_base_type = comparison_coercion(&array_base_type, 
&elem_base_type);
+
+            let new_base_type = new_base_type.ok_or_else(|| {
+                internal_datafusion_err!(
+                    "Coercion from {array_base_type:?} to {elem_base_type:?} 
not supported."
+                )
+            })?;
+
+            let new_array_type =
+                datafusion_common::utils::coerced_type_with_base_type_only(
+                    array_type,
+                    &new_base_type,
+                );
+
+            match new_array_type {
+                DataType::List(ref field)
+                | DataType::LargeList(ref field)
+                | DataType::FixedSizeList(ref field, _) => {
+                    let new_elem_type = field.data_type();
+                    if is_append {
+                        Ok(vec![vec![new_array_type.clone(), 
new_elem_type.clone()]])
+                    } else {
+                        Ok(vec![vec![new_elem_type.to_owned(), 
new_array_type.clone()]])
+                    }
+                }
+                _ => Ok(vec![vec![]]),
+            }
+        }
+        fn array_and_index(current_types: &[DataType]) -> 
Result<Vec<Vec<DataType>>> {

Review Comment:
   ```suggestion
           fn array(array_type: &DataType) -> Option<DataType> {
               match array_type {
                   DataType::List(_)
                   | DataType::LargeList(_)
                   | DataType::FixedSizeList(_, _) => {
                       let array_type = 
coerced_fixed_size_list_to_list(array_type);
                       Some(array_type)
                   }
                   _ => None,
               }
           }
   
           match self {
               ArrayFunctionSignature::ArrayAndElement => {
                   array_append_or_prepend_valid_types(current_types, true)
               }
               ArrayFunctionSignature::ElementAndArray => {
                   array_append_or_prepend_valid_types(current_types, false)
               }
               ArrayFunctionSignature::ArrayAndIndex => {
                   if current_types.len() != 2 {
                       return Ok(vec![vec![]]);
                   }
                   let array_type = array(&current_types[0]);
                   if let Some(array_type) = array_type {
                       Ok(vec![vec![array_type, DataType::Int64]])
                   } else {
                       Ok(vec![])
                   }
               }
               ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
                   array_element_and_optional_index(current_types)
               }
               ArrayFunctionSignature::Array => {
                   if current_types.len() != 1 {
                       return Ok(vec![vec![]]);
                   }
                   let array_type = array(&current_types[0]);
                   if let Some(array_type) = array_type {
                       Ok(vec![vec![array_type]])
                   } else {
                       Ok(vec![])
                   }
               }
           }
   ```



##########
datafusion/expr/src/signature.rs:
##########
@@ -133,25 +136,168 @@ pub enum ArrayFunctionSignature {
     /// The first argument's list dimension should be one dimension less than 
the second argument's list dimension.
     ElementAndArray,
     /// Specialized Signature for Array functions of the form (List/LargeList, 
Index)
+    /// The first argument should be List/LargeList/FixedSizedList, and the 
second argument should be Int64.
     ArrayAndIndex,
     /// Specialized Signature for Array functions of the form (List/LargeList, 
Element, Optional Index)
     ArrayAndElementAndOptionalIndex,
+    /// Specialized Signature for ArrayEmpty and similar functions
+    /// The function takes a single argument that must be a 
List/LargeList/FixedSizeList
+    /// or something that can be coerced to one of those types.
+    Array,
+}
+
+impl ArrayFunctionSignature {
+    /// Arguments to ArrayFunctionSignature
+    /// `current_types` - The data types of the arguments
+    /// Returns the valid types for the function signature
+    pub fn get_type_signature(
+        &self,
+        current_types: &[DataType],
+    ) -> Result<Vec<Vec<DataType>>> {
+        fn array_element_and_optional_index(
+            current_types: &[DataType],
+        ) -> Result<Vec<Vec<DataType>>> {
+            // make sure there's 2 or 3 arguments
+            if !(current_types.len() == 2 || current_types.len() == 3) {
+                return Ok(vec![vec![]]);
+            }
+
+            let first_two_types = &current_types[0..2];
+            let mut valid_types =
+                array_append_or_prepend_valid_types(first_two_types, true)?;
+
+            // Early return if there are only 2 arguments
+            if current_types.len() == 2 {
+                return Ok(valid_types);
+            }
+
+            let valid_types_with_index = valid_types
+                .iter()
+                .map(|t| {
+                    let mut t = t.clone();
+                    t.push(DataType::Int64);
+                    t
+                })
+                .collect::<Vec<_>>();
+
+            valid_types.extend(valid_types_with_index);
+
+            Ok(valid_types)
+        }
+
+        fn array_append_or_prepend_valid_types(
+            current_types: &[DataType],
+            is_append: bool,
+        ) -> Result<Vec<Vec<DataType>>> {
+            if current_types.len() != 2 {
+                return Ok(vec![vec![]]);
+            }
+
+            let (array_type, elem_type) = if is_append {
+                (&current_types[0], &current_types[1])
+            } else {
+                (&current_types[1], &current_types[0])
+            };
+
+            // We follow Postgres on `array_append(Null, T)`, which is not 
valid.
+            if array_type.eq(&DataType::Null) {
+                return Ok(vec![vec![]]);
+            }
+
+            // We need to find the coerced base type, mainly for cases like:
+            // `array_append(List(null), i64)` -> `List(i64)`
+            let array_base_type = 
datafusion_common::utils::base_type(array_type);
+            let elem_base_type = 
datafusion_common::utils::base_type(elem_type);
+            let new_base_type = comparison_coercion(&array_base_type, 
&elem_base_type);
+
+            let new_base_type = new_base_type.ok_or_else(|| {
+                internal_datafusion_err!(
+                    "Coercion from {array_base_type:?} to {elem_base_type:?} 
not supported."
+                )
+            })?;
+
+            let new_array_type =
+                datafusion_common::utils::coerced_type_with_base_type_only(
+                    array_type,
+                    &new_base_type,
+                );
+
+            match new_array_type {
+                DataType::List(ref field)
+                | DataType::LargeList(ref field)
+                | DataType::FixedSizeList(ref field, _) => {
+                    let new_elem_type = field.data_type();
+                    if is_append {
+                        Ok(vec![vec![new_array_type.clone(), 
new_elem_type.clone()]])
+                    } else {
+                        Ok(vec![vec![new_elem_type.to_owned(), 
new_array_type.clone()]])
+                    }
+                }
+                _ => Ok(vec![vec![]]),
+            }
+        }
+        fn array_and_index(current_types: &[DataType]) -> 
Result<Vec<Vec<DataType>>> {

Review Comment:
   I prefer to return the data type for `array` instead of long but useless 
`Result<Vec<Vec<DataType>>>`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to