This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new c96b9710f50 [SPARK-44059] Add analyzer support of named arguments for built-in functions c96b9710f50 is described below commit c96b9710f50ed045d8e949db6c0e251bbd57b201 Author: Richard Yu <richard...@databricks.com> AuthorDate: Tue Jul 18 08:57:55 2023 +0800 [SPARK-44059] Add analyzer support of named arguments for built-in functions ### What changes were proposed in this pull request? Add analyzer support for named function arguments. ### Why are the changes needed? Part of the project needed for general named function argument support. ### Does this PR introduce _any_ user-facing change? We added support for named arguments for the ```CountMinSketchAgg``` and ```Mask``` SQL functions. ### How was this patch tested? A new suite was added for this test called NamedArgumentFunctionSuite. Closes #42020 from learningchess2003/44059-final. Authored-by: Richard Yu <richard...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 228b5dbfd7688a8efa7135d9ec7b00b71e41a38a) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- common/utils/src/main/resources/error/README.md | 1 + .../src/main/resources/error/error-classes.json | 44 ++- ...ate-routine-parameter-assignment-error-class.md | 36 +++ docs/sql-error-conditions.md | 34 ++- .../sql/catalyst/analysis/FunctionRegistry.scala | 113 ++++++- .../expressions/aggregate/CountMinSketchAgg.scala | 49 +-- .../sql/catalyst/expressions/generators.scala | 79 ++++- .../sql/catalyst/expressions/maskExpressions.scala | 31 +- .../plans/logical/FunctionBuilderBase.scala | 177 +++++++++++ .../spark/sql/errors/QueryCompilationErrors.scala | 76 ++++- .../analysis/NamedParameterFunctionSuite.scala | 151 +++++++++ .../named-function-arguments.sql.out | 337 ++++++++++++++++++--- .../sql-tests/inputs/named-function-arguments.sql | 55 ++++ .../results/named-function-arguments.sql.out | 324 +++++++++++++++++--- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 9 +- 15 files changed, 1386 insertions(+), 130 deletions(-) diff --git a/common/utils/src/main/resources/error/README.md b/common/utils/src/main/resources/error/README.md index dfcb42d49e7..aed2c0becd3 100644 --- a/common/utils/src/main/resources/error/README.md +++ b/common/utils/src/main/resources/error/README.md @@ -666,6 +666,7 @@ The following SQLSTATEs are collated from: |4274C |42 |Syntax Error or Access Rule Violation |74C |The specified attribute was not found in the trusted context.|DB2 |N |DB2 | |4274D |42 |Syntax Error or Access Rule Violation |74D |The specified attribute already exists in the trusted context.|DB2 |N |DB2 | |4274E |42 |Syntax Error or Access Rule Violation |74E |The specified attribute is not supported in the trusted context.|DB2 |N |DB2 | +|4274K |42 |Syntax Error or Access Rule Violation |74K |Invalid use of a named argument when invoking a routine.|DB2 |N |DB2 | |4274M |42 |Syntax Error or Access Rule Violation |74M |An undefined period name was detected. |DB2 |N |DB2 | |42801 |42 |Syntax Error or Access Rule Violation |801 |Isolation level UR is invalid, because the result table is not read-only.|DB2 |N |DB2 | |42802 |42 |Syntax Error or Access Rule Violation |802 |The number of target values is not the same as the number of source values.|DB2 |N |DB2 | diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index e8cdaa6c63b..b136878e6d2 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -738,6 +738,24 @@ ], "sqlState" : "23505" }, + "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT" : { + "message" : [ + "Call to function <functionName> is invalid because it includes multiple argument assignments to the same parameter name <parameterName>." + ], + "subClass" : { + "BOTH_POSITIONAL_AND_NAMED" : { + "message" : [ + "A positional argument and named argument both referred to the same parameter." + ] + }, + "DOUBLE_NAMED_ARGUMENT_REFERENCE" : { + "message" : [ + "More than one named argument referred to the same parameter." + ] + } + }, + "sqlState" : "4274K" + }, "EMPTY_JSON_FIELD_VALUE" : { "message" : [ "Failed to parse an empty string for data type <dataType>." @@ -1956,7 +1974,13 @@ "Not allowed to implement multiple UDF interfaces, UDF class <className>." ] }, - "NAMED_ARGUMENTS_SUPPORT_DISABLED" : { + "NAMED_PARAMETERS_NOT_SUPPORTED" : { + "message" : [ + "Named parameters are not supported for function <functionName>; please retry the query with positional arguments to the function call instead." + ], + "sqlState" : "4274K" + }, + "NAMED_PARAMETER_SUPPORT_DISABLED" : { "message" : [ "Cannot call function <functionName> because named argument references are not enabled here. In this case, the named argument reference was <argument>. Set \"spark.sql.allowNamedFunctionArguments\" to \"true\" to turn on feature." ] @@ -2295,6 +2319,12 @@ ], "sqlState" : "42614" }, + "REQUIRED_PARAMETER_NOT_FOUND" : { + "message" : [ + "Cannot invoke function <functionName> because the parameter named <parameterName> is required, but the function call did not supply a value. Please update the function call to supply an argument value (either positionally or by name) and retry the query again." + ], + "sqlState" : "4274K" + }, "REQUIRES_SINGLE_PART_NAMESPACE" : { "message" : [ "<sessionCatalog> requires a single-part namespace, but got <namespace>." @@ -2485,6 +2515,12 @@ ], "sqlState" : "42K09" }, + "UNEXPECTED_POSITIONAL_ARGUMENT" : { + "message" : [ + "Cannot invoke function <functionName> because it contains positional argument(s) following named argument(s); please rearrange them so the positional arguments come first and then retry the query again." + ], + "sqlState" : "4274K" + }, "UNKNOWN_PROTOBUF_MESSAGE_TYPE" : { "message" : [ "Attempting to treat <descriptorName> as a Message, but it was <containingType>." @@ -2514,6 +2550,12 @@ ], "sqlState" : "428C4" }, + "UNRECOGNIZED_PARAMETER_NAME" : { + "message" : [ + "Cannot invoke function <functionName> because the function call included a named argument reference for the argument named <argumentName>, but this function does not include any signature containing an argument with this name. Did you mean one of the following? [<proposal>]." + ], + "sqlState" : "4274K" + }, "UNRECOGNIZED_SQL_TYPE" : { "message" : [ "Unrecognized SQL type - name: <typeName>, id: <jdbcType>." diff --git a/docs/sql-error-conditions-duplicate-routine-parameter-assignment-error-class.md b/docs/sql-error-conditions-duplicate-routine-parameter-assignment-error-class.md new file mode 100644 index 00000000000..d9f14b5a55e --- /dev/null +++ b/docs/sql-error-conditions-duplicate-routine-parameter-assignment-error-class.md @@ -0,0 +1,36 @@ +--- +layout: global +title: DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT error class +displayTitle: DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT error class +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Call to function `<functionName>` is invalid because it includes multiple argument assignments to the same parameter name `<parameterName>`. + +This error class has the following derived error classes: + +## BOTH_POSITIONAL_AND_NAMED + +A positional argument and named argument both referred to the same parameter. + +## DOUBLE_NAMED_ARGUMENT_REFERENCE + +More than one named argument referred to the same parameter. + + diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 91b77a6452b..5686324a055 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -456,6 +456,14 @@ Found duplicate clauses: `<clauseName>`. Please, remove one of them. Found duplicate keys `<keyColumn>`. +### [DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT](sql-error-conditions-duplicate-routine-parameter-assignment-error-class.html) + +[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Call to function `<functionName>` is invalid because it includes multiple argument assignments to the same parameter name `<parameterName>`. + +For more details see [DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT](sql-error-conditions-duplicate-routine-parameter-assignment-error-class.html) + ### EMPTY_JSON_FIELD_VALUE [SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) @@ -1210,7 +1218,13 @@ SQLSTATE: none assigned Not allowed to implement multiple UDF interfaces, UDF class `<className>`. -### NAMED_ARGUMENTS_SUPPORT_DISABLED +### NAMED_PARAMETERS_NOT_SUPPORTED + +[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Named parameters are not supported for function `<functionName>`; please retry the query with positional arguments to the function call instead. + +### NAMED_PARAMETER_SUPPORT_DISABLED SQLSTATE: none assigned @@ -1521,6 +1535,12 @@ Failed to rename as `<sourcePath>` was not found. The `<clause>` clause may be used at most once per `<operation>` operation. +### REQUIRED_PARAMETER_NOT_FOUND + +[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Cannot invoke function `<functionName>` because the parameter named `<parameterName>` is required, but the function call did not supply a value. Please update the function call to supply an argument value (either positionally or by name) and retry the query again. + ### REQUIRES_SINGLE_PART_NAMESPACE [SQLSTATE: 42K05](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) @@ -1724,6 +1744,12 @@ Found an unclosed bracketed comment. Please, append */ at the end of the comment Parameter `<paramIndex>` of function `<functionName>` requires the `<requiredType>` type, however `<inputSql>` has the type `<inputType>`. +### UNEXPECTED_POSITIONAL_ARGUMENT + +[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Cannot invoke function `<functionName>` because it contains positional argument(s) following named argument(s); please rearrange them so the positional arguments come first and then retry the query again. + ### UNKNOWN_PROTOBUF_MESSAGE_TYPE SQLSTATE: none assigned @@ -1754,6 +1780,12 @@ Unpivot value columns must share a least common type, some types do not: [`<type All unpivot value columns must have the same size as there are value column names (`<names>`). +### UNRECOGNIZED_PARAMETER_NAME + +[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Cannot invoke function `<functionName>` because the function call included a named argument reference for the argument named `<argumentName>`, but this function does not include any signature containing an argument with this name. Did you mean one of the following? [`<proposal>`]. + ### UNRECOGNIZED_SQL_TYPE [SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index a9bda2e0b7c..558579cdb80 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.xml._ -import org.apache.spark.sql.catalyst.plans.logical.{Generate, LogicalPlan, OneRowRelation, Range} +import org.apache.spark.sql.catalyst.plans.logical.{FunctionBuilderBase, Generate, LogicalPlan, OneRowRelation, Range} import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types._ @@ -358,8 +358,8 @@ object FunctionRegistry { // misc non-aggregate functions expression[Abs]("abs"), expression[Coalesce]("coalesce"), - expression[Explode]("explode"), - expressionGeneratorOuter[Explode]("explode_outer"), + expressionBuilder("explode", ExplodeExpressionBuilder), + expressionGeneratorBuilderOuter("explode_outer", ExplodeExpressionBuilder), expression[Greatest]("greatest"), expression[If]("if"), expression[Inline]("inline"), @@ -491,7 +491,7 @@ object FunctionRegistry { expression[CollectList]("collect_list"), expression[CollectList]("array_agg", true, Some("3.3.0")), expression[CollectSet]("collect_set"), - expression[CountMinSketchAgg]("count_min_sketch"), + expressionBuilder("count_min_sketch", CountMinSketchAggExpressionBuilder), expression[BoolAnd]("every", true), expression[BoolAnd]("bool_and"), expression[BoolOr]("any", true), @@ -823,7 +823,7 @@ object FunctionRegistry { castAlias("string", StringType), // mask functions - expression[Mask]("mask"), + expressionBuilder("mask", MaskExpressionBuilder), // csv expression[CsvToStructs]("from_csv"), @@ -887,6 +887,9 @@ object FunctionRegistry { since: Option[String] = None): (String, (ExpressionInfo, FunctionBuilder)) = { val (expressionInfo, builder) = FunctionRegistryBase.build[T](name, since) val newBuilder = (expressions: Seq[Expression]) => { + if (expressions.exists(_.isInstanceOf[NamedArgumentExpression])) { + throw QueryCompilationErrors.namedArgumentsNotSupported(name) + } val expr = builder(expressions) if (setAlias) expr.setTagValue(FUNC_ALIAS, name) expr @@ -894,6 +897,32 @@ object FunctionRegistry { (name, (expressionInfo, newBuilder)) } + /** + * This method will be used to rearrange the arguments provided in function invocation + * in the order defined by the function signature given in the builder instance. + * + * @param name The name of the function + * @param builder The builder of the function expression + * @param expressions The argument list passed in function invocation + * @tparam T The class of the builder + * @return An argument list in positional order defined by the builder + */ + def rearrangeExpressions[T <: FunctionBuilderBase[_]]( + name: String, + builder: T, + expressions: Seq[Expression]) : Seq[Expression] = { + val rearrangedExpressions = if (!builder.functionSignature.isEmpty) { + val functionSignature = builder.functionSignature.get + builder.rearrange(functionSignature, expressions, name) + } else { + expressions + } + if (rearrangedExpressions.exists(_.isInstanceOf[NamedArgumentExpression])) { + throw QueryCompilationErrors.namedArgumentsNotSupported(name) + } + rearrangedExpressions + } + private def expressionBuilder[T <: ExpressionBuilder : ClassTag]( name: String, builder: T, @@ -902,7 +931,8 @@ object FunctionRegistry { val info = FunctionRegistryBase.expressionInfo[T](name, since) val funcBuilder = (expressions: Seq[Expression]) => { assert(expressions.forall(_.resolved), "function arguments must be resolved.") - val expr = builder.build(name, expressions) + val rearrangedExpressions = rearrangeExpressions(name, builder, expressions) + val expr = builder.build(name, rearrangedExpressions) if (setAlias) expr.setTagValue(FUNC_ALIAS, name) expr } @@ -935,9 +965,22 @@ object FunctionRegistry { private def expressionGeneratorOuter[T <: Generator : ClassTag](name: String) : (String, (ExpressionInfo, FunctionBuilder)) = { - val (_, (info, generatorBuilder)) = expression[T](name) + val (_, (info, builder)) = expression[T](name) val outerBuilder = (args: Seq[Expression]) => { - GeneratorOuter(generatorBuilder(args).asInstanceOf[Generator]) + GeneratorOuter(builder(args).asInstanceOf[Generator]) + } + (name, (info, outerBuilder)) + } + + private def expressionGeneratorBuilderOuter[T <: ExpressionBuilder : ClassTag] + (name: String, builder: T) : (String, (ExpressionInfo, FunctionBuilder)) = { + val info = FunctionRegistryBase.expressionInfo[T](name, since = None) + val outerBuilder = (args: Seq[Expression]) => { + val rearrangedArgs = + FunctionRegistry.rearrangeExpressions(name, builder, args) + val generator = builder.build(name, rearrangedArgs) + assert(generator.isInstanceOf[Generator]) + GeneratorOuter(generator.asInstanceOf[Generator]) } (name, (info, outerBuilder)) } @@ -980,6 +1023,30 @@ object TableFunctionRegistry { (name, (info, (expressions: Seq[Expression]) => builder(expressions))) } + /** + * A function used for table-valued functions to return a builder that + * when given input arguments, will return a function expression representing + * the table-valued functions. + * + * @param name Name of the function + * @param builder Object which will build the expression given input arguments + * @param since Time of implementation + * @tparam T Type of the builder + * @return A tuple of the function name, expression info, and function builder + */ + def generatorBuilder[T <: GeneratorBuilder : ClassTag]( + name: String, + builder: T, + since: Option[String] = None): (String, (ExpressionInfo, TableFunctionBuilder)) = { + val info = FunctionRegistryBase.expressionInfo[T](name, since) + val funcBuilder = (expressions: Seq[Expression]) => { + assert(expressions.forall(_.resolved), "function arguments must be resolved.") + val rearrangedExpressions = FunctionRegistry.rearrangeExpressions(name, builder, expressions) + builder.build(name, rearrangedExpressions) + } + (name, (info, funcBuilder)) + } + def generator[T <: Generator : ClassTag](name: String, outer: Boolean = false) : (String, (ExpressionInfo, TableFunctionBuilder)) = { val (info, builder) = FunctionRegistryBase.build[T](name, since = None) @@ -999,8 +1066,8 @@ object TableFunctionRegistry { val logicalPlans: Map[String, (ExpressionInfo, TableFunctionBuilder)] = Map( logicalPlan[Range]("range"), - generator[Explode]("explode"), - generator[Explode]("explode_outer", outer = true), + generatorBuilder("explode", ExplodeGeneratorBuilder), + generatorBuilder("explode_outer", ExplodeOuterGeneratorBuilder), generator[Inline]("inline"), generator[Inline]("inline_outer", outer = true), generator[JsonTuple]("json_tuple"), @@ -1022,6 +1089,28 @@ object TableFunctionRegistry { val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet } -trait ExpressionBuilder { - def build(funcName: String, expressions: Seq[Expression]): Expression +/** + * This is a trait used for scalar valued functions that defines how their expression + * representations are constructed in [[FunctionRegistry]]. + */ +trait ExpressionBuilder extends FunctionBuilderBase[Expression] + +/** + * This is a trait used for table valued functions that defines how their expression + * representations are constructed in [[TableFunctionRegistry]]. + */ +trait GeneratorBuilder extends FunctionBuilderBase[LogicalPlan] { + override final def build(funcName: String, expressions: Seq[Expression]) : LogicalPlan = { + Generate( + buildGenerator(funcName, expressions), + unrequiredChildIndex = Nil, + outer = isOuter, + qualifier = None, + generatorOutput = Nil, + child = OneRowRelation()) + } + + def isOuter: Boolean + + def buildGenerator(funcName: String, expressions: Seq[Expression]) : Generator } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala index 6cefca418ce..b7988922bd7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala @@ -18,9 +18,10 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, TypeCheckResult} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal} +import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter} import org.apache.spark.sql.catalyst.trees.QuaternaryLike import org.apache.spark.sql.errors.QueryErrorsBase import org.apache.spark.sql.types._ @@ -39,22 +40,6 @@ import org.apache.spark.util.sketch.CountMinSketch * @param confidenceExpression confidence, must be positive and less than 1.0 * @param seedExpression random seed */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = """ - _FUNC_(col, eps, confidence, seed) - Returns a count-min sketch of a column with the given esp, - confidence and seed. The result is an array of bytes, which can be deserialized to a - `CountMinSketch` before usage. Count-min sketch is a probabilistic data structure used for - cardinality estimation using sub-linear space. - """, - examples = """ - Examples: - > SELECT hex(_FUNC_(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col); - 0000000100000000000000030000000100000004000000005D8D6AB90000000000000000000000000000000200000000000000010000000000000000 - """, - group = "agg_funcs", - since = "2.2.0") -// scalastyle:on line.size.limit case class CountMinSketchAgg( child: Expression, epsExpression: Expression, @@ -208,3 +193,33 @@ case class CountMinSketchAgg( confidenceExpression = third, seedExpression = fourth) } + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(col, eps, confidence, seed) - Returns a count-min sketch of a column with the given esp, + confidence and seed. The result is an array of bytes, which can be deserialized to a + `CountMinSketch` before usage. Count-min sketch is a probabilistic data structure used for + cardinality estimation using sub-linear space. + """, + examples = """ + Examples: + > SELECT hex(_FUNC_(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col); + 0000000100000000000000030000000100000004000000005D8D6AB90000000000000000000000000000000200000000000000010000000000000000 + """, + group = "agg_funcs", + since = "2.2.0") +// scalastyle:on line.size.limit +object CountMinSketchAggExpressionBuilder extends ExpressionBuilder { + final val defaultFunctionSignature = FunctionSignature(Seq( + InputParameter("column"), + InputParameter("epsilon"), + InputParameter("confidence"), + InputParameter("seed") + )) + override def functionSignature: Option[FunctionSignature] = Some(defaultFunctionSignature) + override def build(funcName: String, expressions: Seq[Expression]): Expression = { + assert(expressions.size == 4) + new CountMinSketchAgg(expressions(0), expressions(1), expressions(2), expressions(3)) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 6ae7ea206c8..afaaf07d272 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -21,11 +21,12 @@ import scala.collection.mutable import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, GeneratorBuilder, TypeCheckResult} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions.Cast._ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter} import org.apache.spark.sql.catalyst.trees.TreePattern.{GENERATOR, TreePattern} import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.catalyst.util.SQLKeywordUtils._ @@ -413,6 +414,21 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with * 20 * }}} */ +case class Explode(child: Expression) extends ExplodeBase { + override val position: Boolean = false + override protected def withNewChildInternal(newChild: Expression): Explode = + copy(child = newChild) +} + +trait ExplodeGeneratorBuilderBase extends GeneratorBuilder { + override def functionSignature: Option[FunctionSignature] = + Some(FunctionSignature(Seq(InputParameter("collection")))) + override def buildGenerator(funcName: String, expressions: Seq[Expression]): Generator = { + assert(expressions.size == 1) + Explode(expressions(0)) + } +} + // scalastyle:off line.size.limit @ExpressionDescription( usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.", @@ -421,16 +437,66 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with > SELECT _FUNC_(array(10, 20)); 10 20 + > SELECT _FUNC_(collection => array(10, 20)); + 10 + 20 + > SELECT * FROM _FUNC_(collection => array(10, 20)); + 10 + 20 """, since = "1.0.0", group = "generator_funcs") // scalastyle:on line.size.limit -case class Explode(child: Expression) extends ExplodeBase { - override val position: Boolean = false - override protected def withNewChildInternal(newChild: Expression): Explode = - copy(child = newChild) +object ExplodeExpressionBuilder extends ExpressionBuilder { + override def functionSignature: Option[FunctionSignature] = + Some(FunctionSignature(Seq(InputParameter("collection")))) + + override def build(funcName: String, expressions: Seq[Expression]) : Expression = + Explode(expressions(0)) } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.", + examples = """ + Examples: + > SELECT _FUNC_(array(10, 20)); + 10 + 20 + > SELECT _FUNC_(collection => array(10, 20)); + 10 + 20 + > SELECT * FROM _FUNC_(collection => array(10, 20)); + 10 + 20 + """, + since = "1.0.0", + group = "generator_funcs") +// scalastyle:on line.size.limit +object ExplodeGeneratorBuilder extends ExplodeGeneratorBuilderBase { + override def isOuter: Boolean = false +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.", + examples = """ + Examples: + > SELECT _FUNC_(array(10, 20)); + 10 + 20 + > SELECT _FUNC_(collection => array(10, 20)); + 10 + 20 + """, + since = "1.0.0", + group = "generator_funcs") +// scalastyle:on line.size.limit +object ExplodeOuterGeneratorBuilder extends ExplodeGeneratorBuilderBase { + override def isOuter: Boolean = true +} + + /** * Given an input array produces a sequence of rows for each position and value in the array. * @@ -448,6 +514,9 @@ case class Explode(child: Expression) extends ExplodeBase { > SELECT _FUNC_(array(10,20)); 0 10 1 20 + > SELECT * FROM _FUNC_(array(10,20)); + 0 10 + 1 20 """, since = "2.0.0", group = "generator_funcs") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala index af74e7c0f7b..61a96ff5ff9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -18,10 +18,11 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, TypeCheckResult} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter} import org.apache.spark.sql.errors.QueryErrorsBase import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType} import org.apache.spark.unsafe.types.UTF8String @@ -75,6 +76,26 @@ import org.apache.spark.unsafe.types.UTF8String since = "3.4.0", group = "string_funcs") // scalastyle:on line.size.limit +object MaskExpressionBuilder extends ExpressionBuilder { + override def functionSignature: Option[FunctionSignature] = { + val strArg = InputParameter("str") + val upperCharArg = InputParameter("upperChar", Some(Literal(Mask.MASKED_UPPERCASE))) + val lowerCharArg = InputParameter("lowerChar", Some(Literal(Mask.MASKED_LOWERCASE))) + val digitCharArg = InputParameter("digitChar", Some(Literal(Mask.MASKED_DIGIT))) + val otherCharArg = InputParameter( + "otherChar", + Some(Literal(Mask.MASKED_IGNORE, StringType))) + val functionSignature: FunctionSignature = FunctionSignature(Seq( + strArg, upperCharArg, lowerCharArg, digitCharArg, otherCharArg)) + Some(functionSignature) + } + + override def build(funcName: String, expressions: Seq[Expression]): Expression = { + assert(expressions.size == 5) + new Mask(expressions(0), expressions(1), expressions(2), expressions(3), expressions(4)) + } +} + case class Mask( input: Expression, upperChar: Expression, @@ -277,13 +298,13 @@ case class MaskArgument(maskChar: Char, ignore: Boolean) object Mask { // Default character to replace upper-case characters - private val MASKED_UPPERCASE = 'X' + val MASKED_UPPERCASE = 'X' // Default character to replace lower-case characters - private val MASKED_LOWERCASE = 'x' + val MASKED_LOWERCASE = 'x' // Default character to replace digits - private val MASKED_DIGIT = 'n' + val MASKED_DIGIT = 'n' // This value helps to retain original value in the input by ignoring the replacement rules - private val MASKED_IGNORE = null + val MASKED_IGNORE = null def transformInput( input: Any, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala new file mode 100644 index 00000000000..4a2b9eae981 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.catalyst.expressions.{Expression, NamedArgumentExpression} +import org.apache.spark.sql.errors.QueryCompilationErrors + +/** + * This is a base trait that is used for implementing builder classes that can be used to construct + * expressions or logical plans depending on if it is a table-valued or scalar-valued function. + * + * Two classes of builders currently exist for this trait: [[GeneratorBuilder]] and + * [[ExpressionBuilder]]. If a new class of functions are to be added, a new trait should also be + * created which extends this trait. + * + * @tparam T The type that is expected to be returned by the [[FunctionBuilderBase.build]] function + */ +trait FunctionBuilderBase[T] { + /** + * A method that returns the method signature for this function. + * Each function signature includes a list of parameters to which the analyzer can + * compare a function call with provided arguments to determine if that function + * call is a match for the function signature. + * + * IMPORTANT: For now, each function expression builder should have only one function signature. + * Also, for any function signature, required arguments must always come before optional ones. + */ + def functionSignature: Option[FunctionSignature] = None + + /** + * This function rearranges the arguments provided during function invocation in positional order + * according to the function signature. This method will fill in the default values if optional + * parameters do not have their values specified. Any function which supports named arguments + * will have this routine invoked, even if no named arguments are present in the argument list. + * This is done to eliminate constructor overloads in some methods which use them for default + * values prior to the implementation of the named argument framework. This function will also + * check if the number of arguments are correct. If that is not the case, then an error will be + * thrown. + * + * IMPORTANT: This method will be called before the [[FunctionBuilderBase.build]] method is + * invoked. It is guaranteed that the expressions provided to the [[FunctionBuilderBase.build]] + * functions forms a valid set of argument expressions that can be used in the construction of + * the function expression. + * + * @param expectedSignature The method signature which we rearrange our arguments according to + * @param providedArguments The list of arguments passed from function invocation + * @param functionName The name of the function + * @return The rearranged argument list with arguments in positional order + */ + def rearrange( + expectedSignature: FunctionSignature, + providedArguments: Seq[Expression], + functionName: String) : Seq[Expression] = { + NamedParametersSupport.defaultRearrange(expectedSignature, providedArguments, functionName) + } + + def build(funcName: String, expressions: Seq[Expression]): T +} + +object NamedParametersSupport { + /** + * This method is the default routine which rearranges the arguments in positional order according + * to the function signature provided. This will also fill in any default values that exists for + * optional arguments. This method will also be invoked even if there are no named arguments in + * the argument list. This method will keep all positional arguments in their original order. + * + * @param functionSignature The function signature that defines the positional ordering + * @param args The argument list provided in function invocation + * @param functionName The name of the function + * @return A list of arguments rearranged in positional order defined by the provided signature + */ + final def defaultRearrange( + functionSignature: FunctionSignature, + args: Seq[Expression], + functionName: String): Seq[Expression] = { + val parameters: Seq[InputParameter] = functionSignature.parameters + if (parameters.dropWhile(_.default.isEmpty).exists(_.default.isEmpty)) { + throw QueryCompilationErrors.unexpectedRequiredParameterInFunctionSignature( + functionName, functionSignature) + } + + val (positionalArgs, namedArgs) = args.span(!_.isInstanceOf[NamedArgumentExpression]) + val namedParameters: Seq[InputParameter] = parameters.drop(positionalArgs.size) + + // The following loop checks for the following: + // 1. Unrecognized parameter names + // 2. Duplicate routine parameter assignments + val allParameterNames: Seq[String] = parameters.map(_.name) + val parameterNamesSet: Set[String] = allParameterNames.toSet + val positionalParametersSet = allParameterNames.take(positionalArgs.size).toSet + val namedParametersSet = collection.mutable.Set[String]() + + for (arg <- namedArgs) { + arg match { + case namedArg: NamedArgumentExpression => + val parameterName = namedArg.key + if (!parameterNamesSet.contains(parameterName)) { + throw QueryCompilationErrors.unrecognizedParameterName(functionName, namedArg.key, + parameterNamesSet.toSeq) + } + if (positionalParametersSet.contains(parameterName)) { + throw QueryCompilationErrors.positionalAndNamedArgumentDoubleReference( + functionName, namedArg.key) + } + if (namedParametersSet.contains(parameterName)) { + throw QueryCompilationErrors.doubleNamedArgumentReference( + functionName, namedArg.key) + } + namedParametersSet.add(namedArg.key) + case _ => + throw QueryCompilationErrors.unexpectedPositionalArgument(functionName) + } + } + + // Check the argument list size against the provided parameter list length. + if (parameters.size < args.length) { + val validParameterSizes = + Array.range(parameters.count(_.default.isEmpty), parameters.size + 1).toSeq + throw QueryCompilationErrors.wrongNumArgsError( + functionName, validParameterSizes, args.length) + } + + // This constructs a map from argument name to value for argument rearrangement. + val namedArgMap = namedArgs.map { arg => + val namedArg = arg.asInstanceOf[NamedArgumentExpression] + namedArg.key -> namedArg.value + }.toMap + + // We rearrange named arguments to match their positional order. + val rearrangedNamedArgs: Seq[Expression] = namedParameters.map { param => + namedArgMap.getOrElse( + param.name, + if (param.default.isEmpty) { + throw QueryCompilationErrors.requiredParameterNotFound(functionName, param.name) + } else { + param.default.get + } + ) + } + val rearrangedArgs = positionalArgs ++ rearrangedNamedArgs + assert(rearrangedArgs.size == parameters.size) + rearrangedArgs + } +} + +/** + * Represents a parameter of a function expression. Function expressions should use this class + * to construct the argument lists returned in [[Builder]] + * + * @param name The name of the string. + * @param default The default value of the argument. If the default is none, then that means the + * argument is required. If no argument is provided, an exception is thrown. + */ +case class InputParameter(name: String, default: Option[Expression] = None) + +/** + * Represents a method signature and the list of arguments it receives as input. + * Currently, overloads are not supported and only one FunctionSignature is allowed + * per function expression. + * + * @param parameters The list of arguments which the function takes + */ +case class FunctionSignature(parameters: Seq[InputParameter]) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 955046e74e1..346f25580aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateMap, CreateStruct, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition} import org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Assignment, Join, LogicalPlan, SerdeInfo, Window} +import org.apache.spark.sql.catalyst.plans.logical.{Assignment, FunctionSignature, Join, LogicalPlan, SerdeInfo, Window} import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode} import org.apache.spark.sql.catalyst.util.{quoteIdentifier, FailFastMode, ParseMode, PermissiveMode} import org.apache.spark.sql.connector.catalog._ @@ -50,6 +50,78 @@ import org.apache.spark.sql.types._ */ private[sql] object QueryCompilationErrors extends QueryErrorsBase { + def unexpectedRequiredParameterInFunctionSignature( + functionName: String, functionSignature: FunctionSignature) : Throwable = { + val errorMessage = s"Function $functionName has an unexpected required argument for" + + s" the provided function signature $functionSignature. All required arguments should" + + " come before optional arguments." + SparkException.internalError(errorMessage) + } + + def namedArgumentsNotSupported(functionName: String) : Throwable = { + new AnalysisException( + errorClass = "NAMED_PARAMETERS_NOT_SUPPORTED", + messageParameters = Map("functionName" -> toSQLId(functionName)) + ) + } + + def positionalAndNamedArgumentDoubleReference( + functionName: String, parameterName: String) : Throwable = { + val errorClass = + "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.BOTH_POSITIONAL_AND_NAMED" + new AnalysisException( + errorClass = errorClass, + messageParameters = Map( + "functionName" -> toSQLId(functionName), + "parameterName" -> toSQLId(parameterName)) + ) + } + + def doubleNamedArgumentReference( + functionName: String, parameterName: String): Throwable = { + val errorClass = + "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE" + new AnalysisException( + errorClass = errorClass, + messageParameters = Map( + "functionName" -> toSQLId(functionName), + "parameterName" -> toSQLId(parameterName)) + ) + } + + def requiredParameterNotFound( + functionName: String, parameterName: String) : Throwable = { + new AnalysisException( + errorClass = "REQUIRED_PARAMETER_NOT_FOUND", + messageParameters = Map( + "functionName" -> toSQLId(functionName), + "parameterName" -> toSQLId(parameterName)) + ) + } + + def unrecognizedParameterName( + functionName: String, argumentName: String, candidates: Seq[String]): Throwable = { + import org.apache.spark.sql.catalyst.util.StringUtils.orderSuggestedIdentifiersBySimilarity + + val inputs = candidates.map(candidate => Seq(candidate)).toSeq + val recommendations = orderSuggestedIdentifiersBySimilarity(argumentName, inputs) + .take(3) + new AnalysisException( + errorClass = "UNRECOGNIZED_PARAMETER_NAME", + messageParameters = Map( + "functionName" -> toSQLId(functionName), + "argumentName" -> toSQLId(argumentName), + "proposal" -> recommendations.mkString(" ")) + ) + } + + def unexpectedPositionalArgument(functionName: String): Throwable = { + new AnalysisException( + errorClass = "UNEXPECTED_POSITIONAL_ARGUMENT", + messageParameters = Map("functionName" -> toSQLId(functionName)) + ) + } + def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = { new AnalysisException( errorClass = "GROUPING_ID_COLUMN_MISMATCH", @@ -195,7 +267,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def namedArgumentsNotEnabledError(functionName: String, argumentName: String): Throwable = { new AnalysisException( - errorClass = "NAMED_ARGUMENTS_SUPPORT_DISABLED", + errorClass = "NAMED_PARAMETER_SUPPORT_DISABLED", messageParameters = Map( "functionName" -> toSQLId(functionName), "argument" -> toSQLId(argumentName)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedParameterFunctionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedParameterFunctionSuite.scala new file mode 100644 index 00000000000..dd5cb5e7d03 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedParameterFunctionSuite.scala @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.SparkThrowable +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, NamedArgumentExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.plans.logical.{FunctionBuilderBase, FunctionSignature, InputParameter, NamedParametersSupport} +import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId +import org.apache.spark.sql.types.DataType + + +case class DummyExpression( + k1: Expression, + k2: Expression, + k3: Expression, + k4: Expression) extends Expression { + override def nullable: Boolean = false + override def eval(input: InternalRow): Any = None + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = null + override def dataType: DataType = null + override def children: Seq[Expression] = Nil + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): Expression = null +} + +object DummyExpressionBuilder extends ExpressionBuilder { + def defaultFunctionSignature: FunctionSignature = { + FunctionSignature(Seq(InputParameter("k1"), + InputParameter("k2"), + InputParameter("k3"), + InputParameter("k4"))) + } + + override def functionSignature: Option[FunctionSignature] = + Some(defaultFunctionSignature) + override def build(funcName: String, expressions: Seq[Expression]): Expression = + DummyExpression(expressions(0), expressions(1), expressions(2), expressions(3)) +} + +class NamedArgumentFunctionSuite extends AnalysisTest { + + final val k1Arg = Literal("v1") + final val k2Arg = NamedArgumentExpression("k2", Literal("v2")) + final val k3Arg = NamedArgumentExpression("k3", Literal("v3")) + final val k4Arg = NamedArgumentExpression("k4", Literal("v4")) + final val namedK1Arg = NamedArgumentExpression("k1", Literal("v1-2")) + final val args = Seq(k1Arg, k4Arg, k2Arg, k3Arg) + final val expectedSeq = Seq(Literal("v1"), Literal("v2"), Literal("v3"), Literal("v4")) + final val signature = DummyExpressionBuilder.defaultFunctionSignature + final val illegalSignature = FunctionSignature(Seq( + InputParameter("k1"), InputParameter("k2", Option(Literal("v2"))), InputParameter("k3"))) + + test("Check rearrangement of expressions") { + val rearrangedArgs = NamedParametersSupport.defaultRearrange( + signature, args, "function") + for ((returnedArg, expectedArg) <- rearrangedArgs.zip(expectedSeq)) { + assert(returnedArg == expectedArg) + } + val rearrangedArgsWithBuilder = + FunctionRegistry.rearrangeExpressions("function", DummyExpressionBuilder, args) + for ((returnedArg, expectedArg) <- rearrangedArgsWithBuilder.zip(expectedSeq)) { + assert(returnedArg == expectedArg) + } + } + + private def parseRearrangeException(functionSignature: FunctionSignature, + expressions: Seq[Expression], + functionName: String = "function"): SparkThrowable = { + intercept[SparkThrowable]( + NamedParametersSupport.defaultRearrange(functionSignature, expressions, functionName)) + } + + private def parseExternalException[T <: FunctionBuilderBase[_]]( + functionName: String, + builder: T, + expressions: Seq[Expression]) : SparkThrowable = { + intercept[SparkThrowable]( + FunctionRegistry.rearrangeExpressions[T](functionName, builder, expressions)) + } + + test("DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT") { + val errorClass = + "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.BOTH_POSITIONAL_AND_NAMED" + checkError( + exception = parseRearrangeException( + signature, Seq(k1Arg, k2Arg, k3Arg, k4Arg, namedK1Arg), "foo"), + errorClass = errorClass, + parameters = Map("functionName" -> toSQLId("foo"), "parameterName" -> toSQLId("k1")) + ) + checkError( + exception = parseRearrangeException( + signature, Seq(k1Arg, k2Arg, k3Arg, k4Arg, k4Arg), "foo"), + errorClass = "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE", + parameters = Map("functionName" -> toSQLId("foo"), "parameterName" -> toSQLId("k4")) + ) + } + + test("REQUIRED_PARAMETER_NOT_FOUND") { + checkError( + exception = parseRearrangeException(signature, Seq(k1Arg, k2Arg, k3Arg), "foo"), + errorClass = "REQUIRED_PARAMETER_NOT_FOUND", + parameters = Map("functionName" -> toSQLId("foo"), "parameterName" -> toSQLId("k4")) + ) + } + + test("UNRECOGNIZED_PARAMETER_NAME") { + checkError( + exception = parseRearrangeException(signature, + Seq(k1Arg, k2Arg, k3Arg, k4Arg, NamedArgumentExpression("k5", Literal("k5"))), "foo"), + errorClass = "UNRECOGNIZED_PARAMETER_NAME", + parameters = Map("functionName" -> toSQLId("foo"), "argumentName" -> toSQLId("k5"), + "proposal" -> (toSQLId("k1") + " " + toSQLId("k2") + " " + toSQLId("k3"))) + ) + } + + test("UNEXPECTED_POSITIONAL_ARGUMENT") { + checkError( + exception = parseRearrangeException(signature, + Seq(k2Arg, k3Arg, k1Arg, k4Arg), "foo"), + errorClass = "UNEXPECTED_POSITIONAL_ARGUMENT", + parameters = Map("functionName" -> toSQLId("foo")) + ) + } + + test("INTERNAL_ERROR: Enforce optional arguments after required arguments") { + val errorMessage = s"Function foo has an unexpected required argument for the provided" + + s" function signature ${illegalSignature}. All required arguments should come before" + + s" optional arguments." + checkError( + exception = parseRearrangeException(illegalSignature, args, "foo"), + errorClass = "INTERNAL_ERROR", + parameters = Map("message" -> errorMessage) + ) + } +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out index faa05535cb3..e01e0ca5ee0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out @@ -2,111 +2,368 @@ -- !query SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd') -- !query analysis +Project [mask(AbCD123-@$#, Q, q, d, o) AS mask(AbCD123-@$#, Q, q, d, o)#x] ++- OneRowRelation + + +-- !query +SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbCD123-@$#') +-- !query analysis +Project [mask(AbCD123-@$#, Q, q, d, o) AS mask(AbCD123-@$#, Q, q, d, o)#x] ++- OneRowRelation + + +-- !query +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 'd') +-- !query analysis +Project [mask(AbCD123-@$#, Q, q, d, null) AS mask(AbCD123-@$#, Q, q, d, NULL)#x] ++- OneRowRelation + + +-- !query +SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 'AbCD123-@$#') +-- !query analysis +Project [mask(AbCD123-@$#, Q, q, d, null) AS mask(AbCD123-@$#, Q, q, d, NULL)#x] ++- OneRowRelation + + +-- !query +create temporary view t2 as select * from values + ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), + ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), + ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), + ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), + ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) + as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i) +-- !query analysis +CreateViewCommand `t2`, select * from values + ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), + ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), + ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), + ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), + ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) + as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true + +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + +- SubqueryAlias t2 + +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + + +-- !query +SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence => 0.5d)) FROM t2 +-- !query analysis +Aggregate [hex(count_min_sketch(t2d#xL, 0.5, 0.5, 1, 0, 0)) AS hex(count_min_sketch(t2d, 0.5, 0.5, 1))#x] ++- SubqueryAlias t2 + +- View (`t2`, [t2a#x,t2b#x,t2c#x,t2d#xL,t2e#x,t2f#x,t2g#x,t2h#x,t2i#x]) + +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as double) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x] + +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + +- SubqueryAlias t2 + +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + + +-- !query +SELECT hex(count_min_sketch(seed => 1, epsilon => 0.5d, confidence => 0.5d, column => t2d)) FROM t2 +-- !query analysis +Aggregate [hex(count_min_sketch(t2d#xL, 0.5, 0.5, 1, 0, 0)) AS hex(count_min_sketch(t2d, 0.5, 0.5, 1))#x] ++- SubqueryAlias t2 + +- View (`t2`, [t2a#x,t2b#x,t2c#x,t2d#xL,t2e#x,t2f#x,t2g#x,t2h#x,t2i#x]) + +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as double) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x] + +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + +- SubqueryAlias t2 + +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + + +-- !query +SELECT hex(count_min_sketch(t2d, 0.5d, seed => 1, confidence => 0.5d)) FROM t2 +-- !query analysis +Aggregate [hex(count_min_sketch(t2d#xL, 0.5, 0.5, 1, 0, 0)) AS hex(count_min_sketch(t2d, 0.5, 0.5, 1))#x] ++- SubqueryAlias t2 + +- View (`t2`, [t2a#x,t2b#x,t2c#x,t2d#xL,t2e#x,t2f#x,t2g#x,t2h#x,t2i#x]) + +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as double) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x] + +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + +- SubqueryAlias t2 + +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x] + + +-- !query +SELECT * FROM explode(collection => array(1, 2)) +-- !query analysis +Project [col#x] ++- Generate explode(array(1, 2)), false, [col#x] + +- OneRowRelation + + +-- !query +SELECT * FROM explode_outer(collection => map('a', 1, 'b', 2)) +-- !query analysis +Project [key#x, value#x] ++- Generate explode(map(a, 1, b, 2)), true, [key#x, value#x] + +- OneRowRelation + + +-- !query +SELECT * FROM explode(array(1, 2)), explode(array(3, 4)) +-- !query analysis +Project [col#x, col#x] ++- Join Inner + :- Generate explode(array(1, 2)), false, [col#x] + : +- OneRowRelation + +- Generate explode(array(3, 4)), false, [col#x] + +- OneRowRelation + + +-- !query +SELECT * FROM explode(array(1, 2)) AS t, LATERAL explode(array(3 * t.col, 4 * t.col)) +-- !query analysis +Project [col#x, col#x] ++- LateralJoin lateral-subquery#x [col#x && col#x], Inner + : +- Generate explode(array((3 * outer(col#x)), (4 * outer(col#x)))), false, [col#x] + : +- OneRowRelation + +- SubqueryAlias t + +- Generate explode(array(1, 2)), false, [col#x] + +- OneRowRelation + + +-- !query +SELECT num, val, 'Spark' FROM explode(map(1, 'a', 2, 'b')) AS t(num, val) +-- !query analysis +Project [num#x, val#x, Spark AS Spark#x] ++- SubqueryAlias t + +- Project [key#x AS num#x, value#x AS val#x] + +- Generate explode(map(1, a, 2, b)), false, [key#x, value#x] + +- OneRowRelation + + +-- !query +SELECT * FROM explode(collection => explode(array(1))) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS", + "sqlState" : "0A000", + "messageParameters" : { + "expression" : "\"explode(explode(array(1)))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 54, + "fragment" : "explode(collection => explode(array(1)))" + } ] +} + + +-- !query +SELECT * FROM explode(collection => explode(collection => array(1))) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS", + "sqlState" : "0A000", + "messageParameters" : { + "expression" : "\"explode(explode(array(1)))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 68, + "fragment" : "explode(collection => explode(collection => array(1)))" + } ] +} + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v AS SELECT id FROM range(0, 8) +-- !query analysis +CreateViewCommand `v`, SELECT id FROM range(0, 8), false, true, LocalTempView, true + +- Project [id#xL] + +- Range (0, 8, step=1, splits=None) + + +-- !query +SELECT * FROM explode(collection => TABLE(v)) +-- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", "sqlState" : "42K09", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(AbCD123-@$#, namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(o), namedargumentexpression(d))\"" + "inputSql" : "\"outer(__auto_generated_subquery_name_0.c)\"", + "inputType" : "\"STRUCT<id: BIGINT>\"", + "paramIndex" : "1", + "requiredType" : "(\"ARRAY\" or \"MAP\")", + "sqlExpr" : "\"explode(outer(__auto_generated_subquery_name_0.c))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 45, + "fragment" : "explode(collection => TABLE(v))" + } ] +} + + +-- !query +SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNEXPECTED_POSITIONAL_ARGUMENT", + "sqlState" : "4274K", + "messageParameters" : { + "functionName" : "`mask`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, "stopIndex" : 98, - "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" + "fragment" : "mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" } ] } -- !query -SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbCD123-@$#') +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', digitChar => 'e') -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", - "sqlState" : "42K09", + "errorClass" : "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE", + "sqlState" : "4274K", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(Q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(o), namedargumentexpression(d), namedargumentexpression(AbCD123-@$#))\"" + "functionName" : "`mask`", + "parameterName" : "`digitChar`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 105, - "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbCD123-@$#')" + "stopIndex" : 116, + "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', digitChar => 'e')" } ] } -- !query -SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 'd') +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbC') -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", - "sqlState" : "42K09", + "errorClass" : "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.BOTH_POSITIONAL_AND_NAMED", + "sqlState" : "4274K", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(AbCD123-@$#, namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(d), NULL)\"" + "functionName" : "`mask`", + "parameterName" : "`str`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 80, - "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 'd')" + "stopIndex" : 112, + "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbC')" } ] } -- !query -SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 'AbCD123-@$#') +SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd') -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", - "sqlState" : "42K09", + "errorClass" : "REQUIRED_PARAMETER_NOT_FOUND", + "sqlState" : "4274K", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(Q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(d), namedargumentexpression(AbCD123-@$#), NULL)\"" + "functionName" : "`mask`", + "parameterName" : "`str`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 87, - "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 'AbCD123-@$#')" + "stopIndex" : 83, + "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" } ] } -- !query -SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd') +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', cellular => 'automata') -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE", - "sqlState" : "42K09", + "errorClass" : "UNRECOGNIZED_PARAMETER_NAME", + "sqlState" : "4274K", "messageParameters" : { - "exprName" : "upperChar", - "sqlExpr" : "\"mask(namedargumentexpression(q), AbCD123-@$#, namedargumentexpression(Q), namedargumentexpression(o), namedargumentexpression(d))\"" + "argumentName" : "`cellular`", + "functionName" : "`mask`", + "proposal" : "`str` `upperChar` `otherChar`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 98, - "fragment" : "mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" + "stopIndex" : 122, + "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', cellular => 'automata')" + } ] +} + + +-- !query +SELECT encode(str => 'a', charset => 'utf-8') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NAMED_PARAMETERS_NOT_SUPPORTED", + "sqlState" : "4274K", + "messageParameters" : { + "functionName" : "`encode`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 45, + "fragment" : "encode(str => 'a', charset => 'utf-8')" + } ] +} + + +-- !query +SELECT mask('AbCD123-@$#', 'Q', 'q', 'd', 'o', 'k') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "6", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "[1, 2, 3, 4, 5]", + "functionName" : "`mask`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 51, + "fragment" : "mask('AbCD123-@$#', 'Q', 'q', 'd', 'o', 'k')" } ] } diff --git a/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql b/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql index aeb7b1e85cd..99f33d78152 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql @@ -1,5 +1,60 @@ +-- Test for named arguments for Mask SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd'); SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbCD123-@$#'); SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 'd'); SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 'AbCD123-@$#'); + +-- Test for named arguments for CountMinSketchAgg +create temporary view t2 as select * from values + ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), + ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), + ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), + ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), + ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) + as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i); + +SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence => 0.5d)) FROM t2; +SELECT hex(count_min_sketch(seed => 1, epsilon => 0.5d, confidence => 0.5d, column => t2d)) FROM t2; +SELECT hex(count_min_sketch(t2d, 0.5d, seed => 1, confidence => 0.5d)) FROM t2; + +-- Test for tabled value functions explode and explode_outer +SELECT * FROM explode(collection => array(1, 2)); +SELECT * FROM explode_outer(collection => map('a', 1, 'b', 2)); +SELECT * FROM explode(array(1, 2)), explode(array(3, 4)); +SELECT * FROM explode(array(1, 2)) AS t, LATERAL explode(array(3 * t.col, 4 * t.col)); +SELECT num, val, 'Spark' FROM explode(map(1, 'a', 2, 'b')) AS t(num, val); + +-- Test for wrapped EXPLODE call to check error preservation +SELECT * FROM explode(collection => explode(array(1))); +SELECT * FROM explode(collection => explode(collection => array(1))); + +-- Test with TABLE parser rule +CREATE OR REPLACE TEMPORARY VIEW v AS SELECT id FROM range(0, 8); +SELECT * FROM explode(collection => TABLE(v)); + +-- Unexpected positional argument SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd'); + +-- Duplicate parameter assignment +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', digitChar => 'e'); +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbC'); + +-- Required parameter not found +SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd'); + +-- Unrecognized parameter name +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', cellular => 'automata'); + +-- Named arguments not supported +SELECT encode(str => 'a', charset => 'utf-8'); + +-- Wrong number of arguments +SELECT mask('AbCD123-@$#', 'Q', 'q', 'd', 'o', 'k'); diff --git a/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out index 842374542ec..3b223cc0e15 100644 --- a/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out @@ -2,121 +2,365 @@ -- !query SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd') -- !query schema +struct<mask(AbCD123-@$#, Q, q, d, o):string> +-- !query output +QqQQdddoooo + + +-- !query +SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbCD123-@$#') +-- !query schema +struct<mask(AbCD123-@$#, Q, q, d, o):string> +-- !query output +QqQQdddoooo + + +-- !query +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 'd') +-- !query schema +struct<mask(AbCD123-@$#, Q, q, d, NULL):string> +-- !query output +QqQQddd-@$# + + +-- !query +SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 'AbCD123-@$#') +-- !query schema +struct<mask(AbCD123-@$#, Q, q, d, NULL):string> +-- !query output +QqQQddd-@$# + + +-- !query +create temporary view t2 as select * from values + ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), + ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), + ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), + ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), + ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), + ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), + ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), + ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), + ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), + ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) + as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT hex(count_min_sketch(t2d, seed => 1, epsilon => 0.5d, confidence => 0.5d)) FROM t2 +-- !query schema +struct<hex(count_min_sketch(t2d, 0.5, 0.5, 1)):string> +-- !query output +00000001000000000000000D0000000100000004000000005D8D6AB90000000000000002000000000000000700000000000000010000000000000003 + + +-- !query +SELECT hex(count_min_sketch(seed => 1, epsilon => 0.5d, confidence => 0.5d, column => t2d)) FROM t2 +-- !query schema +struct<hex(count_min_sketch(t2d, 0.5, 0.5, 1)):string> +-- !query output +00000001000000000000000D0000000100000004000000005D8D6AB90000000000000002000000000000000700000000000000010000000000000003 + + +-- !query +SELECT hex(count_min_sketch(t2d, 0.5d, seed => 1, confidence => 0.5d)) FROM t2 +-- !query schema +struct<hex(count_min_sketch(t2d, 0.5, 0.5, 1)):string> +-- !query output +00000001000000000000000D0000000100000004000000005D8D6AB90000000000000002000000000000000700000000000000010000000000000003 + + +-- !query +SELECT * FROM explode(collection => array(1, 2)) +-- !query schema +struct<col:int> +-- !query output +1 +2 + + +-- !query +SELECT * FROM explode_outer(collection => map('a', 1, 'b', 2)) +-- !query schema +struct<key:string,value:int> +-- !query output +a 1 +b 2 + + +-- !query +SELECT * FROM explode(array(1, 2)), explode(array(3, 4)) +-- !query schema +struct<col:int,col:int> +-- !query output +1 3 +1 4 +2 3 +2 4 + + +-- !query +SELECT * FROM explode(array(1, 2)) AS t, LATERAL explode(array(3 * t.col, 4 * t.col)) +-- !query schema +struct<col:int,col:int> +-- !query output +1 3 +1 4 +2 6 +2 8 + + +-- !query +SELECT num, val, 'Spark' FROM explode(map(1, 'a', 2, 'b')) AS t(num, val) +-- !query schema +struct<num:int,val:string,Spark:string> +-- !query output +1 a Spark +2 b Spark + + +-- !query +SELECT * FROM explode(collection => explode(array(1))) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS", + "sqlState" : "0A000", + "messageParameters" : { + "expression" : "\"explode(explode(array(1)))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 54, + "fragment" : "explode(collection => explode(array(1)))" + } ] +} + + +-- !query +SELECT * FROM explode(collection => explode(collection => array(1))) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS", + "sqlState" : "0A000", + "messageParameters" : { + "expression" : "\"explode(explode(array(1)))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 68, + "fragment" : "explode(collection => explode(collection => array(1)))" + } ] +} + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v AS SELECT id FROM range(0, 8) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM explode(collection => TABLE(v)) +-- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", "sqlState" : "42K09", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(AbCD123-@$#, namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(o), namedargumentexpression(d))\"" + "inputSql" : "\"outer(__auto_generated_subquery_name_0.c)\"", + "inputType" : "\"STRUCT<id: BIGINT>\"", + "paramIndex" : "1", + "requiredType" : "(\"ARRAY\" or \"MAP\")", + "sqlExpr" : "\"explode(outer(__auto_generated_subquery_name_0.c))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 45, + "fragment" : "explode(collection => TABLE(v))" + } ] +} + + +-- !query +SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNEXPECTED_POSITIONAL_ARGUMENT", + "sqlState" : "4274K", + "messageParameters" : { + "functionName" : "`mask`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, "stopIndex" : 98, - "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" + "fragment" : "mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" } ] } -- !query -SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbCD123-@$#') +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', digitChar => 'e') -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", - "sqlState" : "42K09", + "errorClass" : "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE", + "sqlState" : "4274K", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(Q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(o), namedargumentexpression(d), namedargumentexpression(AbCD123-@$#))\"" + "functionName" : "`mask`", + "parameterName" : "`digitChar`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 105, - "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbCD123-@$#')" + "stopIndex" : 116, + "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', digitChar => 'e')" } ] } -- !query -SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 'd') +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbC') -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", - "sqlState" : "42K09", + "errorClass" : "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.BOTH_POSITIONAL_AND_NAMED", + "sqlState" : "4274K", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(AbCD123-@$#, namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(d), NULL)\"" + "functionName" : "`mask`", + "parameterName" : "`str`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 80, - "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', digitChar => 'd')" + "stopIndex" : 112, + "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', str => 'AbC')" } ] } -- !query -SELECT mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 'AbCD123-@$#') +SELECT mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd') -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", - "sqlState" : "42K09", + "errorClass" : "REQUIRED_PARAMETER_NOT_FOUND", + "sqlState" : "4274K", "messageParameters" : { - "inputExpr" : "\"namedargumentexpression(Q)\"", - "inputName" : "upperChar", - "inputType" : "\"STRING\"", - "sqlExpr" : "\"mask(namedargumentexpression(q), namedargumentexpression(Q), namedargumentexpression(d), namedargumentexpression(AbCD123-@$#), NULL)\"" + "functionName" : "`mask`", + "parameterName" : "`str`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 87, - "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', digitChar => 'd', str => 'AbCD123-@$#')" + "stopIndex" : 83, + "fragment" : "mask(lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" } ] } -- !query -SELECT mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd') +SELECT mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', cellular => 'automata') -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE", - "sqlState" : "42K09", + "errorClass" : "UNRECOGNIZED_PARAMETER_NAME", + "sqlState" : "4274K", "messageParameters" : { - "exprName" : "upperChar", - "sqlExpr" : "\"mask(namedargumentexpression(q), AbCD123-@$#, namedargumentexpression(Q), namedargumentexpression(o), namedargumentexpression(d))\"" + "argumentName" : "`cellular`", + "functionName" : "`mask`", + "proposal" : "`str` `upperChar` `otherChar`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 98, - "fragment" : "mask(lowerChar => 'q', 'AbCD123-@$#', upperChar => 'Q', otherChar => 'o', digitChar => 'd')" + "stopIndex" : 122, + "fragment" : "mask('AbCD123-@$#', lowerChar => 'q', upperChar => 'Q', otherChar => 'o', digitChar => 'd', cellular => 'automata')" + } ] +} + + +-- !query +SELECT encode(str => 'a', charset => 'utf-8') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NAMED_PARAMETERS_NOT_SUPPORTED", + "sqlState" : "4274K", + "messageParameters" : { + "functionName" : "`encode`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 45, + "fragment" : "encode(str => 'a', charset => 'utf-8')" + } ] +} + + +-- !query +SELECT mask('AbCD123-@$#', 'Q', 'q', 'd', 'o', 'k') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "6", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "[1, 2, 3, 4, 5]", + "functionName" : "`mask`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 51, + "fragment" : "mask('AbCD123-@$#', 'Q', 'q', 'd', 'o', 'k')" } ] } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala index 2731760f7ef..7ebb677b121 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala @@ -32,16 +32,11 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL intercept[ParseException](sql(sqlText).collect()) } - test("NAMED_ARGUMENTS_SUPPORT_DISABLED: named arguments not turned on") { + test("NAMED_PARAMETER_SUPPORT_DISABLED: named arguments not turned on") { withSQLConf("spark.sql.allowNamedFunctionArguments" -> "false") { - checkError( - exception = parseException("SELECT * FROM encode(value => 'abc', charset => 'utf-8')"), - errorClass = "NAMED_ARGUMENTS_SUPPORT_DISABLED", - parameters = Map("functionName" -> toSQLId("encode"), "argument" -> toSQLId("value")) - ) checkError( exception = parseException("SELECT explode(arr => array(10, 20))"), - errorClass = "NAMED_ARGUMENTS_SUPPORT_DISABLED", + errorClass = "NAMED_PARAMETER_SUPPORT_DISABLED", parameters = Map("functionName"-> toSQLId("explode"), "argument" -> toSQLId("arr")) ) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org