This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0062778571 Improve error message for aggregate/window functions (#7265)
0062778571 is described below

commit 00627785718d9d98998021bf44585f32c33af3ea
Author: Yongting You <[email protected]>
AuthorDate: Sat Aug 12 05:07:44 2023 -0700

    Improve error message for aggregate/window functions (#7265)
---
 .../tests/sqllogictests/test_files/aggregate.slt   | 34 ++++++------
 .../core/tests/sqllogictests/test_files/errors.slt | 62 +++++++++++++++++++++-
 datafusion/expr/src/aggregate_function.rs          | 12 ++++-
 datafusion/expr/src/built_in_function.rs           | 45 ++++++----------
 datafusion/expr/src/utils.rs                       | 31 +++++++++++
 datafusion/expr/src/window_function.rs             | 12 ++++-
 datafusion/optimizer/src/analyzer/type_coercion.rs |  2 +-
 7 files changed, 147 insertions(+), 51 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt 
b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 1780ccab76..e881acf575 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -76,23 +76,23 @@ statement error DataFusion error: Schema error: Schema 
contains duplicate unqual
 SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) 
count_c9_str FROM aggregate_test_100
 
 # csv_query_approx_percentile_cont_with_weight
-statement error Error during planning: The function 
ApproxPercentileContWithWeight does not support inputs of type Utf8.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Utf8, 
Int8, Float64\)'. You might need to add explicit type casts.
 SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100
 
-statement error Error during planning: The weight argument for 
ApproxPercentileContWithWeight does not support inputs of type Utf8
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Int16, 
Utf8, Float64\)'\. You might need to add explicit type casts\.
 SELECT approx_percentile_cont_with_weight(c3, c1, 0.95) FROM aggregate_test_100
 
-statement error Error during planning: The percentile argument for 
ApproxPercentileContWithWeight must be Float64, not Utf8.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Int16, 
Int8, Utf8\)'\. You might need to add explicit type casts\.
 SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100
 
 # csv_query_approx_percentile_cont_with_histogram_bins
 statement error This feature is not implemented: Tdigest max_size value for 
'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\).
 SELECT c1, approx_percentile_cont(c3, 0.95, -1000) AS c3_p95 FROM 
aggregate_test_100 GROUP BY 1 ORDER BY 1
 
-statement error Error during planning: The percentile sample points count for 
ApproxPercentileCont must be integer, not Utf8.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'APPROX_PERCENTILE_CONT\(Int16, Float64, 
Utf8\)'\. You might need to add explicit type casts\.
 SELECT approx_percentile_cont(c3, 0.95, c1) FROM aggregate_test_100
 
-statement error Error during planning: The percentile sample points count for 
ApproxPercentileCont must be integer, not Float64.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'APPROX_PERCENTILE_CONT\(Int16, Float64, 
Float64\)'\. You might need to add explicit type casts\.
 SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100
 
 # array agg can use order by
@@ -1644,10 +1644,10 @@ NULL NULL NULL NULL Row 2 Y
 
 
 # aggregate_timestamps_sum
-statement error DataFusion error: Error during planning: The function Sum does 
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'SUM\(Timestamp\(Nanosecond, None\)\)'\. You 
might need to add explicit type casts\.
 SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t;
 
-statement error DataFusion error: Error during planning: The function Sum does 
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'SUM\(Timestamp\(Nanosecond, None\)\)'\. You 
might need to add explicit type casts\.
 SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY 
tag ORDER BY tag;
 
 # aggregate_timestamps_count
@@ -1688,10 +1688,10 @@ Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10.432 2021-0
 
 
 # aggregate_timestamps_avg
-statement error DataFusion error: Error during planning: The function Avg does 
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'AVG\(Timestamp\(Nanosecond, None\)\)'\. You 
might need to add explicit type casts\.
 SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
 
-statement error DataFusion error: Error during planning: The function Avg does 
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'AVG\(Timestamp\(Nanosecond, None\)\)'\. You 
might need to add explicit type casts\.
 SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY 
tag ORDER BY tag;
 
 
@@ -1738,10 +1738,10 @@ NULL NULL Row 2 Y
 
 
 # aggregate_timestamps_sum
-statement error DataFusion error: Error during planning: The function Sum does 
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'SUM\(Date32\)'\. You might need to add 
explicit type casts\.
 SELECT sum(date32), sum(date64) FROM t;
 
-statement error DataFusion error: Error during planning: The function Sum does 
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'SUM\(Date32\)'\. You might need to add 
explicit type casts\.
 SELECT tag, sum(date32), sum(date64) FROM t GROUP BY tag ORDER BY tag;
 
 # aggregate_timestamps_count
@@ -1782,10 +1782,10 @@ Y 2021-01-01 2021-01-01T00:00:00
 
 
 # aggregate_timestamps_avg
-statement error DataFusion error: Error during planning: The function Avg does 
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'AVG\(Date32\)'\. You might need to add 
explicit type casts\.
 SELECT avg(date32), avg(date64) FROM t
 
-statement error DataFusion error: Error during planning: The function Avg does 
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'AVG\(Date32\)'\. You might need to add 
explicit type casts\.
 SELECT tag, avg(date32), avg(date64) FROM t GROUP BY tag ORDER BY tag;
 
 
@@ -1835,10 +1835,10 @@ select * from t;
 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3 B
 
 # aggregate_times_sum
-statement error DataFusion error: Error during planning: The function Sum does 
not support inputs of type Time64\(Nanosecond\).
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'SUM\(Time64\(Nanosecond\)\)'\. You might 
need to add explicit type casts\.
 SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t
 
-statement error DataFusion error: Error during planning: The function Sum does 
not support inputs of type Time64\(Nanosecond\)\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'SUM\(Time64\(Nanosecond\)\)'\. You might 
need to add explicit type casts\.
 SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY 
tag ORDER BY tag
 
 # aggregate_times_count
@@ -1880,10 +1880,10 @@ B 21:06:28.247821084 21:06:28.247821 21:06:28.247 
21:06:28
 
 
 # aggregate_times_avg
-statement error DataFusion error: Error during planning: The function Avg does 
not support inputs of type Time64\(Nanosecond\).
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'AVG\(Time64\(Nanosecond\)\)'\. You might 
need to add explicit type casts\.
 SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
 
-statement error DataFusion error: Error during planning: The function Avg does 
not support inputs of type Time64\(Nanosecond\)\.
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'AVG\(Time64\(Nanosecond\)\)'\. You might 
need to add explicit type casts\.
 SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY 
tag ORDER BY tag;
 
 statement ok
diff --git a/datafusion/core/tests/sqllogictests/test_files/errors.slt 
b/datafusion/core/tests/sqllogictests/test_files/errors.slt
index 938209d21c..bdbf525abe 100644
--- a/datafusion/core/tests/sqllogictests/test_files/errors.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/errors.slt
@@ -46,7 +46,9 @@ statement error DataFusion error: Arrow error: Cast error: 
Cannot cast string 'c
 SELECT CAST(c1 AS INT) FROM aggregate_test_100
 
 # aggregation_with_bad_arguments
-statement error Error during planning: The function Count expects at least one 
argument
+statement error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: Candidate"\)
+       Candidate functions:
+       COUNT\(Any, \.\., Any\)
 SELECT COUNT(DISTINCT) FROM aggregate_test_100
 
 # query_cte_incorrect
@@ -72,3 +74,61 @@ SELECT COUNT(*) FROM 
nonexistentcatalog.public.aggregate_test_100
 
 statement error Error during planning: Unsupported compound identifier 
'\[Ident \{ value: "way", quote_style: None \}, Ident \{ value: "too", 
quote_style: None \}, Ident \{ value: "many", quote_style: None \}, Ident \{ 
value: "namespaces", quote_style: None \}, Ident \{ value: "as", quote_style: 
None \}, Ident \{ value: "ident", quote_style: None \}, Ident \{ value: 
"prefixes", quote_style: None \}, Ident \{ value: "aggregate_test_100", 
quote_style: None \}\]'
 SELECT COUNT(*) FROM 
way.too.many.namespaces.as.ident.prefixes.aggregate_test_100
+
+
+
+#
+# Wrong scalar function signature
+#
+
+# error message for wrong function signature (Variadic: arbitrary number of 
args all from some common types)
+statement error Error during planning: No function matches the given name and 
argument types 'concat\(\)'. You might need to add explicit type 
casts.\n\tCandidate functions:\n\tconcat\(Utf8, ..\)
+SELECT concat();
+
+# error message for wrong function signature (Uniform: t args all from some 
common types)
+statement error Error during planning: No function matches the given name and 
argument types 'nullif\(Int64\)'. You might need to add explicit type 
casts.\n\tCandidate 
functions:\n\tnullif\(Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8,
 
Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8\)
+SELECT nullif(1);
+
+# error message for wrong function signature (Exact: exact number of args of 
an exact type)
+statement error Error during planning: No function matches the given name and 
argument types 'pi\(Float64\)'. You might need to add explicit type 
casts.\n\tCandidate functions:\n\tpi\(\)
+SELECT pi(3.14);
+
+# error message for wrong function signature (Any: fixed number of args of 
arbitrary types)
+statement error Error during planning: No function matches the given name and 
argument types 'arrow_typeof\(Int64, Int64\)'. You might need to add explicit 
type casts.\n\tCandidate functions:\n\tarrow_typeof\(Any\)
+SELECT arrow_typeof(1, 1);
+
+# error message for wrong function signature (OneOf: fixed number of args of 
arbitrary types)
+statement error Error during planning: No function matches the given name and 
argument types 'power\(Int64, Int64, Int64\)'. You might need to add explicit 
type casts.\n\tCandidate functions:\n\tpower\(Int64, Int64\)\n\tpower\(Float64, 
Float64\)
+SELECT power(1, 2, 3);
+
+#
+# Wrong window/aggregate function signature
+#
+
+# AggregateFunction with wrong number of arguments
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'COUNT\(\)'\. You might need to add explicit 
type casts\.\n\tCandidate functions:\n\tCOUNT\(Any, \.\., Any\)
+select count();
+
+# AggregateFunction with wrong number of arguments
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'AVG\(Utf8, Float64\)'\. You might need to 
add explicit type casts\.\n\tCandidate 
functions:\n\tAVG\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\)
+select avg(c1, c12) from aggregate_test_100;
+
+# AggregateFunction with wrong argument type
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'REGR_SLOPE\(Int64, Utf8\)'\. You might need 
to add explicit type casts\.\n\tCandidate 
functions:\n\tREGR_SLOPE\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64,
 Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\)
+select regr_slope(1, '2');
+
+# WindowFunction using AggregateFunction wrong signature
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'REGR_SLOPE\(Float32, Utf8\)'\. You might 
need to add explicit type casts\.\n\tCandidate 
functions:\n\tREGR_SLOPE\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64,
 Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\)
+select
+c9,
+regr_slope(c11, '2') over () as min1
+from aggregate_test_100
+order by c9
+
+# WindowFunction with BuiltInWindowFunction wrong signature
+statement error DataFusion error: Error during planning: No function matches 
the given name and argument types 'NTH_VALUE\(Int32, Int64, Int64\)'\. You 
might need to add explicit type casts\.\n\tCandidate 
functions:\n\tNTH_VALUE\(Any, Any\)
+select
+c9,
+nth_value(c5, 2, 3) over (order by c9) as nv1
+from aggregate_test_100
+order by c9
diff --git a/datafusion/expr/src/aggregate_function.rs 
b/datafusion/expr/src/aggregate_function.rs
index d6cb5a1903..3debe21800 100644
--- a/datafusion/expr/src/aggregate_function.rs
+++ b/datafusion/expr/src/aggregate_function.rs
@@ -17,6 +17,7 @@
 
 //! Aggregate function module contains all built-in aggregate functions 
definitions
 
+use crate::utils;
 use crate::{type_coercion::aggregates::*, Signature, TypeSignature, 
Volatility};
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{plan_err, DataFusionError, Result};
@@ -231,7 +232,16 @@ impl AggregateFunction {
             self,
             input_expr_types,
             &self.signature(),
-        )?;
+        )
+        // original errors are all related to wrong function signature
+        // aggregate them for better error message
+        .map_err(|_| {
+            DataFusionError::Plan(utils::generate_signature_error_msg(
+                &format!("{self}"),
+                self.signature(),
+                input_expr_types,
+            ))
+        })?;
 
         match self {
             AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 2ad06b873b..6cc7fd5402 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -20,7 +20,8 @@
 use crate::nullif::SUPPORTED_NULLIF_TYPES;
 use crate::type_coercion::functions::data_types;
 use crate::{
-    conditional_expressions, struct_expressions, Signature, TypeSignature, 
Volatility,
+    conditional_expressions, struct_expressions, utils, Signature, 
TypeSignature,
+    Volatility,
 };
 use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
 use datafusion_common::{plan_err, DataFusionError, Result};
@@ -434,33 +435,6 @@ impl BuiltinScalarFunction {
         }
     }
 
-    /// Creates a detailed error message for a function with wrong signature.
-    ///
-    /// For example, a query like `select round(3.14, 1.1);` would yield:
-    /// ```text
-    /// Error during planning: No function matches 'round(Float64, Float64)'. 
You might need to add explicit type casts.
-    ///     Candidate functions:
-    ///     round(Float64, Int64)
-    ///     round(Float32, Int64)
-    ///     round(Float64)
-    ///     round(Float32)
-    /// ```
-    fn generate_signature_error_msg(&self, input_expr_types: &[DataType]) -> 
String {
-        let candidate_signatures = self
-            .signature()
-            .type_signature
-            .to_string_repr()
-            .iter()
-            .map(|args_str| format!("\t{self}({args_str})"))
-            .collect::<Vec<String>>()
-            .join("\n");
-
-        format!(
-            "No function matches the given name and argument types '{}({})'. 
You might need to add explicit type casts.\n\tCandidate functions:\n{}",
-            self, TypeSignature::join_types(input_expr_types, ", "), 
candidate_signatures
-        )
-    }
-
     /// Returns the dimension [`DataType`] of [`DataType::List`] if
     /// treated as a N-dimensional array.
     ///
@@ -493,12 +467,23 @@ impl BuiltinScalarFunction {
         // or the execution panics.
 
         if input_expr_types.is_empty() && !self.supports_zero_argument() {
-            return plan_err!("{}", 
self.generate_signature_error_msg(input_expr_types));
+            return plan_err!(
+                "{}",
+                utils::generate_signature_error_msg(
+                    &format!("{self}"),
+                    self.signature(),
+                    input_expr_types
+                )
+            );
         }
 
         // verify that this is a valid set of data types for this function
         data_types(input_expr_types, &self.signature()).map_err(|_| {
-            
DataFusionError::Plan(self.generate_signature_error_msg(input_expr_types))
+            DataFusionError::Plan(utils::generate_signature_error_msg(
+                &format!("{self}"),
+                self.signature(),
+                input_expr_types,
+            ))
         })?;
 
         // the return type of the built in function.
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 76061194ed..bffcd0669c 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -24,6 +24,7 @@ use crate::logical_plan::{
     Projection, Repartition, Sort as SortPlan, Subquery, SubqueryAlias, Union, 
Unnest,
     Values, Window,
 };
+use crate::signature::{Signature, TypeSignature};
 use crate::{
     BinaryExpr, Cast, CreateMemoryTable, CreateView, DdlStatement, 
DmlStatement, Expr,
     ExprSchemable, GroupingSet, LogicalPlan, LogicalPlanBuilder, Operator, 
TableScan,
@@ -1292,6 +1293,36 @@ pub fn find_valid_equijoin_key_pair(
     Ok(join_key_pair)
 }
 
+/// Creates a detailed error message for a function with wrong signature.
+///
+/// For example, a query like `select round(3.14, 1.1);` would yield:
+/// ```text
+/// Error during planning: No function matches 'round(Float64, Float64)'. You 
might need to add explicit type casts.
+///     Candidate functions:
+///     round(Float64, Int64)
+///     round(Float32, Int64)
+///     round(Float64)
+///     round(Float32)
+/// ```
+pub fn generate_signature_error_msg(
+    func_name: &str,
+    func_signature: Signature,
+    input_expr_types: &[DataType],
+) -> String {
+    let candidate_signatures = func_signature
+        .type_signature
+        .to_string_repr()
+        .iter()
+        .map(|args_str| format!("\t{func_name}({args_str})"))
+        .collect::<Vec<String>>()
+        .join("\n");
+
+    format!(
+            "No function matches the given name and argument types '{}({})'. 
You might need to add explicit type casts.\n\tCandidate functions:\n{}",
+            func_name, TypeSignature::join_types(input_expr_types, ", "), 
candidate_signatures
+        )
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/expr/src/window_function.rs 
b/datafusion/expr/src/window_function.rs
index 89c59baa4c..1f36ebdd6b 100644
--- a/datafusion/expr/src/window_function.rs
+++ b/datafusion/expr/src/window_function.rs
@@ -22,6 +22,7 @@
 
 use crate::aggregate_function::AggregateFunction;
 use crate::type_coercion::functions::data_types;
+use crate::utils;
 use crate::{AggregateUDF, Signature, TypeSignature, Volatility, WindowUDF};
 use arrow::datatypes::DataType;
 use datafusion_common::{plan_err, DataFusionError, Result};
@@ -187,7 +188,16 @@ impl BuiltInWindowFunction {
         // or the execution panics.
 
         // verify that this is a valid set of data types for this function
-        data_types(input_expr_types, &self.signature())?;
+        data_types(input_expr_types, &self.signature())
+            // original errors are all related to wrong function signature
+            // aggregate them for better error message
+            .map_err(|_| {
+                DataFusionError::Plan(utils::generate_signature_error_msg(
+                    &format!("{self}"),
+                    self.signature(),
+                    input_expr_types,
+                ))
+            })?;
 
         match self {
             BuiltInWindowFunction::RowNumber
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs 
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 9d313e8474..f0e5f114b7 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -994,7 +994,7 @@ mod test {
         ));
         let err = Projection::try_new(vec![agg_expr], empty).err().unwrap();
         assert_eq!(
-            "Plan(\"The function Avg does not support inputs of type Utf8.\")",
+            "Plan(\"No function matches the given name and argument types 
'AVG(Utf8)'. You might need to add explicit type casts.\\n\\tCandidate 
functions:\\n\\tAVG(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64)\")",
             &format!("{err:?}")
         );
         Ok(())

Reply via email to