This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 49562f41678 [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression 49562f41678 is described below commit 49562f416788cab05b3f82a2471a1f2f6561a1d8 Author: panbingkun <pbk1...@gmail.com> AuthorDate: Sat May 21 07:50:59 2022 +0300 [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression ### What changes were proposed in this pull request? In the PR, I propose to use the MULTI_VALUE_SUBQUERY_ERROR error classes for multiple rows from a subquery used as an expression. ### Why are the changes needed? Porting the executing errors for multiple rows from a subquery used as an expression to the new error framework should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added new test suite Closes #36580 from panbingkun/SPARK-39167. Authored-by: panbingkun <pbk1...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 3 +++ .../spark/sql/errors/QueryExecutionErrors.scala | 5 ++++ .../org/apache/spark/sql/execution/subquery.scala | 5 ++-- .../scala/org/apache/spark/sql/SubquerySuite.scala | 11 --------- .../sql/errors/QueryExecutionErrorsSuite.scala | 27 ++++++++++++++++++++++ 5 files changed, 37 insertions(+), 14 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 1a139c018e8..f6fba105872 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -160,6 +160,9 @@ "MULTI_UDF_INTERFACE_ERROR" : { "message" : [ "Not allowed to implement multiple UDF interfaces, UDF class <class>" ] }, + "MULTI_VALUE_SUBQUERY_ERROR" : { + "message" : [ "more than one row returned by a subquery used as an expression: <plan>" ] + }, "NON_LITERAL_PIVOT_VALUES" : { "message" : [ "Literal expressions required for pivot values, found '<expression>'" ], "sqlState" : "42000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 1e664100545..f79b30f0d0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2005,4 +2005,9 @@ object QueryExecutionErrors extends QueryErrorsBase { new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = Array(path), cause = null) } + + def multipleRowSubqueryError(plan: String): Throwable = { + new SparkException( + errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters = Array(plan), cause = null) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index 209b0f79243..c6f5983f243 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike} import org.apache.spark.sql.catalyst.trees.TreePattern._ +import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType} @@ -79,9 +80,7 @@ case class ScalarSubquery( def updateResult(): Unit = { val rows = plan.executeCollect() if (rows.length > 1) { - // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery - throw new IllegalStateException( - s"more than one row returned by a subquery used as an expression:\n$plan") + throw QueryExecutionErrors.multipleRowSubqueryError(plan.toString) } if (rows.length == 1) { assert(rows(0).numFields == 1, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 396fca47634..500913fb289 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql import scala.collection.mutable.ArrayBuffer -import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort} import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec, ScalarSubquery, SubqueryExec, WholeStageCodegenExec} @@ -146,16 +145,6 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark ) } - test("runtime error when the number of rows is greater than 1") { - val e = intercept[SparkException] { - sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect() - } - // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery - assert(e.getErrorClass === "INTERNAL_ERROR") - assert(e.getCause.getMessage.contains( - "more than one row returned by a subquery used as an expression")) - } - test("uncorrelated scalar subquery on a DataFrame generated query") { withTempView("subqueryData") { val df = Seq((1, "one"), (2, "two"), (3, "three")).toDF("key", "value") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index bbf6c0dda79..e8d1afddde2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -611,6 +611,33 @@ class QueryExecutionErrorsSuite matchMsg = true) } } + + test("MULTI_VALUE_SUBQUERY_ERROR: " + + "more than one row returned by a subquery used as an expression") { + checkErrorClass( + exception = intercept[SparkException] { + sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect() + }, + errorClass = "MULTI_VALUE_SUBQUERY_ERROR", + msg = + """more than one row returned by a subquery used as an expression: """ + + """Subquery subquery#\w+, \[id=#\w+\] + |\+\- AdaptiveSparkPlan isFinalPlan=true + | \+\- == Final Plan == + | Union + | :\- \*\(1\) Project \[\w+ AS a#\w+\] + | : \+\- \*\(1\) Scan OneRowRelation\[\] + | \+\- \*\(2\) Project \[\w+ AS a#\w+\] + | \+\- \*\(2\) Scan OneRowRelation\[\] + | \+\- == Initial Plan == + | Union + | :\- Project \[\w+ AS a#\w+\] + | : \+\- Scan OneRowRelation\[\] + | \+\- Project \[\w+ AS a#\w+\] + | \+\- Scan OneRowRelation\[\] + |""".stripMargin, + matchMsg = true) + } } class FakeFileSystemSetPermission extends LocalFileSystem { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org