This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0062778571 Improve error message for aggregate/window functions (#7265)
0062778571 is described below
commit 00627785718d9d98998021bf44585f32c33af3ea
Author: Yongting You <[email protected]>
AuthorDate: Sat Aug 12 05:07:44 2023 -0700
Improve error message for aggregate/window functions (#7265)
---
.../tests/sqllogictests/test_files/aggregate.slt | 34 ++++++------
.../core/tests/sqllogictests/test_files/errors.slt | 62 +++++++++++++++++++++-
datafusion/expr/src/aggregate_function.rs | 12 ++++-
datafusion/expr/src/built_in_function.rs | 45 ++++++----------
datafusion/expr/src/utils.rs | 31 +++++++++++
datafusion/expr/src/window_function.rs | 12 ++++-
datafusion/optimizer/src/analyzer/type_coercion.rs | 2 +-
7 files changed, 147 insertions(+), 51 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 1780ccab76..e881acf575 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -76,23 +76,23 @@ statement error DataFusion error: Schema error: Schema
contains duplicate unqual
SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar))
count_c9_str FROM aggregate_test_100
# csv_query_approx_percentile_cont_with_weight
-statement error Error during planning: The function
ApproxPercentileContWithWeight does not support inputs of type Utf8.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Utf8,
Int8, Float64\)'. You might need to add explicit type casts.
SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100
-statement error Error during planning: The weight argument for
ApproxPercentileContWithWeight does not support inputs of type Utf8
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Int16,
Utf8, Float64\)'\. You might need to add explicit type casts\.
SELECT approx_percentile_cont_with_weight(c3, c1, 0.95) FROM aggregate_test_100
-statement error Error during planning: The percentile argument for
ApproxPercentileContWithWeight must be Float64, not Utf8.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'APPROX_PERCENTILE_CONT_WITH_WEIGHT\(Int16,
Int8, Utf8\)'\. You might need to add explicit type casts\.
SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100
# csv_query_approx_percentile_cont_with_histogram_bins
statement error This feature is not implemented: Tdigest max_size value for
'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\).
SELECT c1, approx_percentile_cont(c3, 0.95, -1000) AS c3_p95 FROM
aggregate_test_100 GROUP BY 1 ORDER BY 1
-statement error Error during planning: The percentile sample points count for
ApproxPercentileCont must be integer, not Utf8.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'APPROX_PERCENTILE_CONT\(Int16, Float64,
Utf8\)'\. You might need to add explicit type casts\.
SELECT approx_percentile_cont(c3, 0.95, c1) FROM aggregate_test_100
-statement error Error during planning: The percentile sample points count for
ApproxPercentileCont must be integer, not Float64.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'APPROX_PERCENTILE_CONT\(Int16, Float64,
Float64\)'\. You might need to add explicit type casts\.
SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100
# array agg can use order by
@@ -1644,10 +1644,10 @@ NULL NULL NULL NULL Row 2 Y
# aggregate_timestamps_sum
-statement error DataFusion error: Error during planning: The function Sum does
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'SUM\(Timestamp\(Nanosecond, None\)\)'\. You
might need to add explicit type casts\.
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t;
-statement error DataFusion error: Error during planning: The function Sum does
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'SUM\(Timestamp\(Nanosecond, None\)\)'\. You
might need to add explicit type casts\.
SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY
tag ORDER BY tag;
# aggregate_timestamps_count
@@ -1688,10 +1688,10 @@ Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432
2021-01-01T05:11:10.432 2021-0
# aggregate_timestamps_avg
-statement error DataFusion error: Error during planning: The function Avg does
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'AVG\(Timestamp\(Nanosecond, None\)\)'\. You
might need to add explicit type casts\.
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
-statement error DataFusion error: Error during planning: The function Avg does
not support inputs of type Timestamp\(Nanosecond, None\)\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'AVG\(Timestamp\(Nanosecond, None\)\)'\. You
might need to add explicit type casts\.
SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY
tag ORDER BY tag;
@@ -1738,10 +1738,10 @@ NULL NULL Row 2 Y
# aggregate_timestamps_sum
-statement error DataFusion error: Error during planning: The function Sum does
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'SUM\(Date32\)'\. You might need to add
explicit type casts\.
SELECT sum(date32), sum(date64) FROM t;
-statement error DataFusion error: Error during planning: The function Sum does
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'SUM\(Date32\)'\. You might need to add
explicit type casts\.
SELECT tag, sum(date32), sum(date64) FROM t GROUP BY tag ORDER BY tag;
# aggregate_timestamps_count
@@ -1782,10 +1782,10 @@ Y 2021-01-01 2021-01-01T00:00:00
# aggregate_timestamps_avg
-statement error DataFusion error: Error during planning: The function Avg does
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'AVG\(Date32\)'\. You might need to add
explicit type casts\.
SELECT avg(date32), avg(date64) FROM t
-statement error DataFusion error: Error during planning: The function Avg does
not support inputs of type Date32\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'AVG\(Date32\)'\. You might need to add
explicit type casts\.
SELECT tag, avg(date32), avg(date64) FROM t GROUP BY tag ORDER BY tag;
@@ -1835,10 +1835,10 @@ select * from t;
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3 B
# aggregate_times_sum
-statement error DataFusion error: Error during planning: The function Sum does
not support inputs of type Time64\(Nanosecond\).
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'SUM\(Time64\(Nanosecond\)\)'\. You might
need to add explicit type casts\.
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t
-statement error DataFusion error: Error during planning: The function Sum does
not support inputs of type Time64\(Nanosecond\)\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'SUM\(Time64\(Nanosecond\)\)'\. You might
need to add explicit type casts\.
SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY
tag ORDER BY tag
# aggregate_times_count
@@ -1880,10 +1880,10 @@ B 21:06:28.247821084 21:06:28.247821 21:06:28.247
21:06:28
# aggregate_times_avg
-statement error DataFusion error: Error during planning: The function Avg does
not support inputs of type Time64\(Nanosecond\).
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'AVG\(Time64\(Nanosecond\)\)'\. You might
need to add explicit type casts\.
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
-statement error DataFusion error: Error during planning: The function Avg does
not support inputs of type Time64\(Nanosecond\)\.
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'AVG\(Time64\(Nanosecond\)\)'\. You might
need to add explicit type casts\.
SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY
tag ORDER BY tag;
statement ok
diff --git a/datafusion/core/tests/sqllogictests/test_files/errors.slt
b/datafusion/core/tests/sqllogictests/test_files/errors.slt
index 938209d21c..bdbf525abe 100644
--- a/datafusion/core/tests/sqllogictests/test_files/errors.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/errors.slt
@@ -46,7 +46,9 @@ statement error DataFusion error: Arrow error: Cast error:
Cannot cast string 'c
SELECT CAST(c1 AS INT) FROM aggregate_test_100
# aggregation_with_bad_arguments
-statement error Error during planning: The function Count expects at least one
argument
+statement error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: Candidate"\)
+ Candidate functions:
+ COUNT\(Any, \.\., Any\)
SELECT COUNT(DISTINCT) FROM aggregate_test_100
# query_cte_incorrect
@@ -72,3 +74,61 @@ SELECT COUNT(*) FROM
nonexistentcatalog.public.aggregate_test_100
statement error Error during planning: Unsupported compound identifier
'\[Ident \{ value: "way", quote_style: None \}, Ident \{ value: "too",
quote_style: None \}, Ident \{ value: "many", quote_style: None \}, Ident \{
value: "namespaces", quote_style: None \}, Ident \{ value: "as", quote_style:
None \}, Ident \{ value: "ident", quote_style: None \}, Ident \{ value:
"prefixes", quote_style: None \}, Ident \{ value: "aggregate_test_100",
quote_style: None \}\]'
SELECT COUNT(*) FROM
way.too.many.namespaces.as.ident.prefixes.aggregate_test_100
+
+
+
+#
+# Wrong scalar function signature
+#
+
+# error message for wrong function signature (Variadic: arbitrary number of
args all from some common types)
+statement error Error during planning: No function matches the given name and
argument types 'concat\(\)'. You might need to add explicit type
casts.\n\tCandidate functions:\n\tconcat\(Utf8, ..\)
+SELECT concat();
+
+# error message for wrong function signature (Uniform: t args all from some
common types)
+statement error Error during planning: No function matches the given name and
argument types 'nullif\(Int64\)'. You might need to add explicit type
casts.\n\tCandidate
functions:\n\tnullif\(Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8,
Boolean/UInt8/UInt16/UInt32/UInt64/Int8/Int16/Int32/Int64/Float32/Float64/Utf8/LargeUtf8\)
+SELECT nullif(1);
+
+# error message for wrong function signature (Exact: exact number of args of
an exact type)
+statement error Error during planning: No function matches the given name and
argument types 'pi\(Float64\)'. You might need to add explicit type
casts.\n\tCandidate functions:\n\tpi\(\)
+SELECT pi(3.14);
+
+# error message for wrong function signature (Any: fixed number of args of
arbitrary types)
+statement error Error during planning: No function matches the given name and
argument types 'arrow_typeof\(Int64, Int64\)'. You might need to add explicit
type casts.\n\tCandidate functions:\n\tarrow_typeof\(Any\)
+SELECT arrow_typeof(1, 1);
+
+# error message for wrong function signature (OneOf: fixed number of args of
arbitrary types)
+statement error Error during planning: No function matches the given name and
argument types 'power\(Int64, Int64, Int64\)'. You might need to add explicit
type casts.\n\tCandidate functions:\n\tpower\(Int64, Int64\)\n\tpower\(Float64,
Float64\)
+SELECT power(1, 2, 3);
+
+#
+# Wrong window/aggregate function signature
+#
+
+# AggregateFunction with wrong number of arguments
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'COUNT\(\)'\. You might need to add explicit
type casts\.\n\tCandidate functions:\n\tCOUNT\(Any, \.\., Any\)
+select count();
+
+# AggregateFunction with wrong number of arguments
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'AVG\(Utf8, Float64\)'\. You might need to
add explicit type casts\.\n\tCandidate
functions:\n\tAVG\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\)
+select avg(c1, c12) from aggregate_test_100;
+
+# AggregateFunction with wrong argument type
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'REGR_SLOPE\(Int64, Utf8\)'\. You might need
to add explicit type casts\.\n\tCandidate
functions:\n\tREGR_SLOPE\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64,
Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\)
+select regr_slope(1, '2');
+
+# WindowFunction using AggregateFunction wrong signature
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'REGR_SLOPE\(Float32, Utf8\)'\. You might
need to add explicit type casts\.\n\tCandidate
functions:\n\tREGR_SLOPE\(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64,
Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64\)
+select
+c9,
+regr_slope(c11, '2') over () as min1
+from aggregate_test_100
+order by c9
+
+# WindowFunction with BuiltInWindowFunction wrong signature
+statement error DataFusion error: Error during planning: No function matches
the given name and argument types 'NTH_VALUE\(Int32, Int64, Int64\)'\. You
might need to add explicit type casts\.\n\tCandidate
functions:\n\tNTH_VALUE\(Any, Any\)
+select
+c9,
+nth_value(c5, 2, 3) over (order by c9) as nv1
+from aggregate_test_100
+order by c9
diff --git a/datafusion/expr/src/aggregate_function.rs
b/datafusion/expr/src/aggregate_function.rs
index d6cb5a1903..3debe21800 100644
--- a/datafusion/expr/src/aggregate_function.rs
+++ b/datafusion/expr/src/aggregate_function.rs
@@ -17,6 +17,7 @@
//! Aggregate function module contains all built-in aggregate functions
definitions
+use crate::utils;
use crate::{type_coercion::aggregates::*, Signature, TypeSignature,
Volatility};
use arrow::datatypes::{DataType, Field};
use datafusion_common::{plan_err, DataFusionError, Result};
@@ -231,7 +232,16 @@ impl AggregateFunction {
self,
input_expr_types,
&self.signature(),
- )?;
+ )
+ // original errors are all related to wrong function signature
+ // aggregate them for better error message
+ .map_err(|_| {
+ DataFusionError::Plan(utils::generate_signature_error_msg(
+ &format!("{self}"),
+ self.signature(),
+ input_expr_types,
+ ))
+ })?;
match self {
AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 2ad06b873b..6cc7fd5402 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -20,7 +20,8 @@
use crate::nullif::SUPPORTED_NULLIF_TYPES;
use crate::type_coercion::functions::data_types;
use crate::{
- conditional_expressions, struct_expressions, Signature, TypeSignature,
Volatility,
+ conditional_expressions, struct_expressions, utils, Signature,
TypeSignature,
+ Volatility,
};
use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
use datafusion_common::{plan_err, DataFusionError, Result};
@@ -434,33 +435,6 @@ impl BuiltinScalarFunction {
}
}
- /// Creates a detailed error message for a function with wrong signature.
- ///
- /// For example, a query like `select round(3.14, 1.1);` would yield:
- /// ```text
- /// Error during planning: No function matches 'round(Float64, Float64)'.
You might need to add explicit type casts.
- /// Candidate functions:
- /// round(Float64, Int64)
- /// round(Float32, Int64)
- /// round(Float64)
- /// round(Float32)
- /// ```
- fn generate_signature_error_msg(&self, input_expr_types: &[DataType]) ->
String {
- let candidate_signatures = self
- .signature()
- .type_signature
- .to_string_repr()
- .iter()
- .map(|args_str| format!("\t{self}({args_str})"))
- .collect::<Vec<String>>()
- .join("\n");
-
- format!(
- "No function matches the given name and argument types '{}({})'.
You might need to add explicit type casts.\n\tCandidate functions:\n{}",
- self, TypeSignature::join_types(input_expr_types, ", "),
candidate_signatures
- )
- }
-
/// Returns the dimension [`DataType`] of [`DataType::List`] if
/// treated as a N-dimensional array.
///
@@ -493,12 +467,23 @@ impl BuiltinScalarFunction {
// or the execution panics.
if input_expr_types.is_empty() && !self.supports_zero_argument() {
- return plan_err!("{}",
self.generate_signature_error_msg(input_expr_types));
+ return plan_err!(
+ "{}",
+ utils::generate_signature_error_msg(
+ &format!("{self}"),
+ self.signature(),
+ input_expr_types
+ )
+ );
}
// verify that this is a valid set of data types for this function
data_types(input_expr_types, &self.signature()).map_err(|_| {
-
DataFusionError::Plan(self.generate_signature_error_msg(input_expr_types))
+ DataFusionError::Plan(utils::generate_signature_error_msg(
+ &format!("{self}"),
+ self.signature(),
+ input_expr_types,
+ ))
})?;
// the return type of the built in function.
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 76061194ed..bffcd0669c 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -24,6 +24,7 @@ use crate::logical_plan::{
Projection, Repartition, Sort as SortPlan, Subquery, SubqueryAlias, Union,
Unnest,
Values, Window,
};
+use crate::signature::{Signature, TypeSignature};
use crate::{
BinaryExpr, Cast, CreateMemoryTable, CreateView, DdlStatement,
DmlStatement, Expr,
ExprSchemable, GroupingSet, LogicalPlan, LogicalPlanBuilder, Operator,
TableScan,
@@ -1292,6 +1293,36 @@ pub fn find_valid_equijoin_key_pair(
Ok(join_key_pair)
}
+/// Creates a detailed error message for a function with wrong signature.
+///
+/// For example, a query like `select round(3.14, 1.1);` would yield:
+/// ```text
+/// Error during planning: No function matches 'round(Float64, Float64)'. You
might need to add explicit type casts.
+/// Candidate functions:
+/// round(Float64, Int64)
+/// round(Float32, Int64)
+/// round(Float64)
+/// round(Float32)
+/// ```
+pub fn generate_signature_error_msg(
+ func_name: &str,
+ func_signature: Signature,
+ input_expr_types: &[DataType],
+) -> String {
+ let candidate_signatures = func_signature
+ .type_signature
+ .to_string_repr()
+ .iter()
+ .map(|args_str| format!("\t{func_name}({args_str})"))
+ .collect::<Vec<String>>()
+ .join("\n");
+
+ format!(
+ "No function matches the given name and argument types '{}({})'.
You might need to add explicit type casts.\n\tCandidate functions:\n{}",
+ func_name, TypeSignature::join_types(input_expr_types, ", "),
candidate_signatures
+ )
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/datafusion/expr/src/window_function.rs
b/datafusion/expr/src/window_function.rs
index 89c59baa4c..1f36ebdd6b 100644
--- a/datafusion/expr/src/window_function.rs
+++ b/datafusion/expr/src/window_function.rs
@@ -22,6 +22,7 @@
use crate::aggregate_function::AggregateFunction;
use crate::type_coercion::functions::data_types;
+use crate::utils;
use crate::{AggregateUDF, Signature, TypeSignature, Volatility, WindowUDF};
use arrow::datatypes::DataType;
use datafusion_common::{plan_err, DataFusionError, Result};
@@ -187,7 +188,16 @@ impl BuiltInWindowFunction {
// or the execution panics.
// verify that this is a valid set of data types for this function
- data_types(input_expr_types, &self.signature())?;
+ data_types(input_expr_types, &self.signature())
+ // original errors are all related to wrong function signature
+ // aggregate them for better error message
+ .map_err(|_| {
+ DataFusionError::Plan(utils::generate_signature_error_msg(
+ &format!("{self}"),
+ self.signature(),
+ input_expr_types,
+ ))
+ })?;
match self {
BuiltInWindowFunction::RowNumber
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 9d313e8474..f0e5f114b7 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -994,7 +994,7 @@ mod test {
));
let err = Projection::try_new(vec![agg_expr], empty).err().unwrap();
assert_eq!(
- "Plan(\"The function Avg does not support inputs of type Utf8.\")",
+ "Plan(\"No function matches the given name and argument types
'AVG(Utf8)'. You might need to add explicit type casts.\\n\\tCandidate
functions:\\n\\tAVG(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64)\")",
&format!("{err:?}")
);
Ok(())