This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new fa4096eb6ab [SPARK-46036][SQL] Removing error-class from raise_error function fa4096eb6ab is described below commit fa4096eb6aba4c66f0d9c5dcbabdfc0804064fff Author: Aleksandar Tomic <aleksandar.to...@databricks.com> AuthorDate: Wed Nov 29 12:51:46 2023 +0100 [SPARK-46036][SQL] Removing error-class from raise_error function ### What changes were proposed in this pull request? - Reverting raise_error to single param variant that is enforced through `ExpressionBuilder` interface (vs. reflection based lookup). - Keeping internal RaiseError class untouched with ability to throw custom exception class. The only way to call it as of this change is from internal code. Open question is whether we want to clean up this code as well, given that currently no one is using it and this code seems too complex if we don't need to expose RaiseError in surface layer. Looking for feedback on this one. ### Why are the changes needed? This PR is a follow up of [raise_error improvement PR](https://github.com/apache/spark/pull/42985). As described in jira [ticket](https://issues.apache.org/jira/browse/SPARK-46036), raise_error with custom error-class parameter was deemed to be too powerful with concern that user might throw internal system errors which shouldn't be exposed to external layers. With this change, from external perspective, we revert to the old behaviour, where raise_error only accepts single string argument which will represent message of `USER_RAISED_EXCEPTION` exception object. ### Does this PR introduce _any_ user-facing change? Yes, we introduce change in raise_error signature. With this change it is no longer allowed to call: ```sql SELECT raise_error('VIEW_NOT_FOUND', Map('relationName', '`v`')) ``` Instead, only allowed version will be: ```sql SELECT raise_error('custom error message')) ``` Which will result in `USER_RAISED_EXCEPTION` being thrown with provided message. ### How was this patch tested? The change updates existing sql tests under misc-functions.sql and adds some new checks. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44004 from dbatomic/SPARK-46036-raise-error. Authored-by: Aleksandar Tomic <aleksandar.to...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../scala/org/apache/spark/sql/functions.scala | 8 -- .../sql/catalyst/analysis/FunctionRegistry.scala | 2 +- .../spark/sql/catalyst/expressions/misc.scala | 43 ++++--- .../scala/org/apache/spark/sql/functions.scala | 8 -- .../analyzer-results/misc-functions.sql.out | 131 +++++++++++++-------- .../resources/sql-tests/inputs/misc-functions.sql | 23 ++-- .../sql-tests/results/misc-functions.sql.out | 123 +++++++++---------- .../sql/expressions/ExpressionInfoSuite.scala | 2 +- 8 files changed, 174 insertions(+), 166 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index ba6fe725ab2..061fca276a3 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -3318,14 +3318,6 @@ object functions { */ def raise_error(c: Column): Column = Column.fn("raise_error", c) - /** - * Throws an exception with the provided error message. - * - * @group misc_funcs - * @since 4.0.0 - */ - def raise_error(c: Column, e: Column): Column = Column.fn("raise_error", c, e) - /** * Returns the estimated number of unique values given the binary representation of a * Datasketches HllSketch. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 4fb8d88f6ea..e72ce91b063 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -722,7 +722,7 @@ object FunctionRegistry { // misc functions expression[AssertTrue]("assert_true"), - expression[RaiseError]("raise_error"), + expressionBuilder("raise_error", RaiseErrorExpressionBuilder), expression[Crc32]("crc32"), expression[Md5]("md5"), expression[Uuid]("uuid"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 60bf5c603d9..1ae8b19ff63 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -19,12 +19,13 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed +import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, UnresolvedSeed} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.trees.TreePattern.{CURRENT_LIKE, TreePattern} import org.apache.spark.sql.catalyst.util.{MapData, RandomUUIDGenerator} +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.errors.QueryExecutionErrors.raiseError import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -61,21 +62,9 @@ case class PrintToStderr(child: Expression) extends UnaryExpression { /** * Throw with the result of an expression (used for debugging). + * Caller can specify the errorClass to be thrown and parameters to be passed to this error class. + * Default is to throw USER_RAISED_EXCEPTION with provided string literal. */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_( expr [, errorParams ]) - Throws a USER_RAISED_EXCEPTION with `expr` as message, or a defined error class in `expr` with a parameter map. A `null` errorParms is equivalent to an empty map.", - examples = """ - Examples: - > SELECT _FUNC_('custom error message'); - [USER_RAISED_EXCEPTION] custom error message - - > SELECT _FUNC_('VIEW_NOT_FOUND', Map('relationName' -> '`V1`')); - [VIEW_NOT_FOUND] The view `V1` cannot be found. ... - """, - since = "3.1.0", - group = "misc_funcs") -// scalastyle:on line.size.limit case class RaiseError(errorClass: Expression, errorParms: Expression, dataType: DataType) extends BinaryExpression with ImplicitCastInputTypes { @@ -139,6 +128,30 @@ object RaiseError { new RaiseError(errorClass, parms) } +/** + * Throw with the result of an expression (used for debugging). + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_( expr ) - Throws a USER_RAISED_EXCEPTION with `expr` as message.", + examples = """ + Examples: + > SELECT _FUNC_('custom error message'); + [USER_RAISED_EXCEPTION] custom error message + """, + since = "3.1.0", + group = "misc_funcs") +// scalastyle:on line.size.limit +object RaiseErrorExpressionBuilder extends ExpressionBuilder { + override def build(funcName: String, expressions: Seq[Expression]): Expression = { + if (expressions.length != 1) { + throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1), expressions.length) + } else { + RaiseError(expressions.head) + } + } +} + /** * A function that throws an exception if 'condition' is not true. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index a52412e94ee..d8b5a4b416c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3269,14 +3269,6 @@ object functions { */ def raise_error(c: Column): Column = Column.fn("raise_error", c) - /** - * Throws an exception with the provided error class and parameter map. - * - * @group misc_funcs - * @since 4.0.0 - */ - def raise_error(c: Column, e: Column): Column = Column.fn("raise_error", c, e) - /** * Returns the estimated number of unique values given the binary representation * of a Datasketches HllSketch. diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out index 1fd6e55a4d2..539a3485842 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out @@ -120,58 +120,85 @@ Project [if ((v#x > 5)) cast(raise_error(USER_RAISED_EXCEPTION, map(errorMessage -- !query -SELECT raise_error('VIEW_NOT_FOUND', Map('relationName', '`v`')) --- !query analysis -Project [raise_error(VIEW_NOT_FOUND, map(relationName, `v`), NullType) AS raise_error(VIEW_NOT_FOUND, map(relationName, `v`))#x] -+- OneRowRelation - - --- !query -SELECT raise_error('VIEW_NOT_FOund', Map('relationName', '`v`')) --- !query analysis -Project [raise_error(VIEW_NOT_FOund, map(relationName, `v`), NullType) AS raise_error(VIEW_NOT_FOund, map(relationName, `v`))#x] -+- OneRowRelation - - --- !query -SELECT raise_error('VIEW_NOT_FOund', Map('relationNAME', '`v`')) --- !query analysis -Project [raise_error(VIEW_NOT_FOund, map(relationNAME, `v`), NullType) AS raise_error(VIEW_NOT_FOund, map(relationNAME, `v`))#x] -+- OneRowRelation - - --- !query -SELECT raise_error('VIEW_NOT_FOUND', Map()) --- !query analysis -Project [raise_error(VIEW_NOT_FOUND, cast(map() as map<string,string>), NullType) AS raise_error(VIEW_NOT_FOUND, map())#x] -+- OneRowRelation - - --- !query -SELECT raise_error('VIEW_NOT_FOUND', Map('relationName', '`v`', 'totallymadeup', '5')) --- !query analysis -Project [raise_error(VIEW_NOT_FOUND, map(relationName, `v`, totallymadeup, 5), NullType) AS raise_error(VIEW_NOT_FOUND, map(relationName, `v`, totallymadeup, 5))#x] -+- OneRowRelation - - --- !query -SELECT raise_error('ALL_PARTITION_COLUMNS_NOT_ALLOWED', Map()) --- !query analysis -Project [raise_error(ALL_PARTITION_COLUMNS_NOT_ALLOWED, cast(map() as map<string,string>), NullType) AS raise_error(ALL_PARTITION_COLUMNS_NOT_ALLOWED, map())#x] -+- OneRowRelation - - --- !query -SELECT raise_error('ALL_PARTITION_COLUMNS_NOT_ALLOWED', NULL) --- !query analysis -Project [raise_error(ALL_PARTITION_COLUMNS_NOT_ALLOWED, cast(null as map<string,string>), NullType) AS raise_error(ALL_PARTITION_COLUMNS_NOT_ALLOWED, NULL)#x] -+- OneRowRelation - - --- !query -SELECT raise_error(NULL, NULL) --- !query analysis -Project [raise_error(cast(null as string), cast(null as map<string,string>), NullType) AS raise_error(NULL, NULL)#x] +SELECT raise_error('error message', Map()) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "2", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`raise_error`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 42, + "fragment" : "raise_error('error message', Map())" + } ] +} + + +-- !query +SELECT raise_error('error message', 'some args') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "2", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`raise_error`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 48, + "fragment" : "raise_error('error message', 'some args')" + } ] +} + + +-- !query +SELECT raise_error() +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "0", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`raise_error`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 20, + "fragment" : "raise_error()" + } ] +} + + +-- !query +SELECT raise_error(NULL) +-- !query analysis +Project [raise_error(USER_RAISED_EXCEPTION, cast(map(errorMessage, null) as map<string,string>), NullType) AS raise_error(USER_RAISED_EXCEPTION, map(errorMessage, NULL))#x] ++- OneRowRelation + + +-- !query +SELECT raise_error(1) +-- !query analysis +Project [raise_error(USER_RAISED_EXCEPTION, cast(map(errorMessage, 1) as map<string,string>), NullType) AS raise_error(USER_RAISED_EXCEPTION, map(errorMessage, 1))#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql index 12e6a36db77..f7c58a0dc5a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql @@ -21,21 +21,20 @@ CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); SELECT raise_error('error message'); SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; -SELECT raise_error('VIEW_NOT_FOUND', Map('relationName', '`v`')); --- Error class is case insensitive -SELECT raise_error('VIEW_NOT_FOund', Map('relationName', '`v`')); --- parameters are case sensitive -SELECT raise_error('VIEW_NOT_FOund', Map('relationNAME', '`v`')); --- Too few parameters -SELECT raise_error('VIEW_NOT_FOUND', Map()); -- Too many parameters -SELECT raise_error('VIEW_NOT_FOUND', Map('relationName', '`v`', 'totallymadeup', '5')); +SELECT raise_error('error message', Map()); + +-- Too many parameters +SELECT raise_error('error message', 'some args'); + +-- Too few parameters +SELECT raise_error(); --- Empty parameter list -SELECT raise_error('ALL_PARTITION_COLUMNS_NOT_ALLOWED', Map()); -SELECT raise_error('ALL_PARTITION_COLUMNS_NOT_ALLOWED', NULL); +-- Passing null as message +SELECT raise_error(NULL); -SELECT raise_error(NULL, NULL); +-- Passing non-string type +SELECT raise_error(1); -- Check legacy config disables printing of [USER_RAISED_EXCEPTION] SET spark.sql.legacy.raiseErrorWithoutErrorClass=true; diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out index c9bdd7d2640..7316c3136ce 100644 --- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -177,121 +177,106 @@ org.apache.spark.SparkRuntimeException -- !query -SELECT raise_error('VIEW_NOT_FOUND', Map('relationName', '`v`')) +SELECT raise_error('error message', Map()) -- !query schema struct<> -- !query output -org.apache.spark.SparkRuntimeException -{ - "errorClass" : "VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`v`" - } -} - - --- !query -SELECT raise_error('VIEW_NOT_FOund', Map('relationName', '`v`')) --- !query schema -struct<> --- !query output -org.apache.spark.SparkRuntimeException +org.apache.spark.sql.AnalysisException { - "errorClass" : "VIEW_NOT_FOUND", - "sqlState" : "42P01", + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", "messageParameters" : { - "relationName" : "`v`" - } + "actualNum" : "2", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`raise_error`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 42, + "fragment" : "raise_error('error message', Map())" + } ] } -- !query -SELECT raise_error('VIEW_NOT_FOund', Map('relationNAME', '`v`')) +SELECT raise_error('error message', 'some args') -- !query schema struct<> -- !query output -org.apache.spark.SparkRuntimeException +org.apache.spark.sql.AnalysisException { - "errorClass" : "USER_RAISED_EXCEPTION_PARAMETER_MISMATCH", - "sqlState" : "P0001", + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", "messageParameters" : { - "errorClass" : "'VIEW_NOT_FOUND'", - "expectedParms" : "'relationName'", - "providedParms" : "'relationNAME'" - } + "actualNum" : "2", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`raise_error`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 48, + "fragment" : "raise_error('error message', 'some args')" + } ] } -- !query -SELECT raise_error('VIEW_NOT_FOUND', Map()) +SELECT raise_error() -- !query schema struct<> -- !query output -org.apache.spark.SparkRuntimeException +org.apache.spark.sql.AnalysisException { - "errorClass" : "USER_RAISED_EXCEPTION_PARAMETER_MISMATCH", - "sqlState" : "P0001", + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", "messageParameters" : { - "errorClass" : "'VIEW_NOT_FOUND'", - "expectedParms" : "'relationName'", - "providedParms" : "" - } + "actualNum" : "0", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`raise_error`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 20, + "fragment" : "raise_error()" + } ] } -- !query -SELECT raise_error('VIEW_NOT_FOUND', Map('relationName', '`v`', 'totallymadeup', '5')) +SELECT raise_error(NULL) -- !query schema struct<> -- !query output org.apache.spark.SparkRuntimeException { - "errorClass" : "USER_RAISED_EXCEPTION_PARAMETER_MISMATCH", + "errorClass" : "USER_RAISED_EXCEPTION", "sqlState" : "P0001", "messageParameters" : { - "errorClass" : "'VIEW_NOT_FOUND'", - "expectedParms" : "'relationName'", - "providedParms" : "'relationName','totallymadeup'" + "errorMessage" : "null" } } -- !query -SELECT raise_error('ALL_PARTITION_COLUMNS_NOT_ALLOWED', Map()) +SELECT raise_error(1) -- !query schema struct<> -- !query output org.apache.spark.SparkRuntimeException { - "errorClass" : "ALL_PARTITION_COLUMNS_NOT_ALLOWED", - "sqlState" : "KD005" -} - - --- !query -SELECT raise_error('ALL_PARTITION_COLUMNS_NOT_ALLOWED', NULL) --- !query schema -struct<> --- !query output -org.apache.spark.SparkRuntimeException -{ - "errorClass" : "ALL_PARTITION_COLUMNS_NOT_ALLOWED", - "sqlState" : "KD005" -} - - --- !query -SELECT raise_error(NULL, NULL) --- !query schema -struct<> --- !query output -org.apache.spark.SparkRuntimeException -{ - "errorClass" : "USER_RAISED_EXCEPTION_UNKNOWN_ERROR_CLASS", + "errorClass" : "USER_RAISED_EXCEPTION", "sqlState" : "P0001", "messageParameters" : { - "errorClass" : "'null'" + "errorMessage" : "1" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index 0f1a45319af..b28a72f7c20 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -193,7 +193,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { "org.apache.spark.sql.catalyst.expressions.TryReflect", "org.apache.spark.sql.catalyst.expressions.SparkVersion", // Throws an error - "org.apache.spark.sql.catalyst.expressions.RaiseError", + "org.apache.spark.sql.catalyst.expressions.RaiseErrorExpressionBuilder", "org.apache.spark.sql.catalyst.expressions.AssertTrue", classOf[CurrentUser].getName, // The encrypt expression includes a random initialization vector to its encrypted result --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org