[spark] branch master updated: [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression

maxgekk Fri, 20 May 2022 21:51:32 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 49562f41678 [SPARK-39167][SQL] Throw an exception w/ an error class 
for multiple rows from a subquery used as an expression
49562f41678 is described below

commit 49562f416788cab05b3f82a2471a1f2f6561a1d8
Author: panbingkun <pbk1...@gmail.com>
AuthorDate: Sat May 21 07:50:59 2022 +0300

    [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows 
from a subquery used as an expression
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to use the MULTI_VALUE_SUBQUERY_ERROR error classes 
for multiple rows from a subquery used as an expression.
    
    ### Why are the changes needed?
    Porting the executing errors for multiple rows from a subquery used as an 
expression to the new error framework should improve user experience with Spark 
SQL.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Added new test suite
    
    Closes #36580 from panbingkun/SPARK-39167.
    
    Authored-by: panbingkun <pbk1...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   |  3 +++
 .../spark/sql/errors/QueryExecutionErrors.scala    |  5 ++++
 .../org/apache/spark/sql/execution/subquery.scala  |  5 ++--
 .../scala/org/apache/spark/sql/SubquerySuite.scala | 11 ---------
 .../sql/errors/QueryExecutionErrorsSuite.scala     | 27 ++++++++++++++++++++++
 5 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 1a139c018e8..f6fba105872 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -160,6 +160,9 @@
   "MULTI_UDF_INTERFACE_ERROR" : {
     "message" : [ "Not allowed to implement multiple UDF interfaces, UDF class 
<class>" ]
   },
+  "MULTI_VALUE_SUBQUERY_ERROR" : {
+    "message" : [ "more than one row returned by a subquery used as an 
expression: <plan>" ]
+  },
   "NON_LITERAL_PIVOT_VALUES" : {
     "message" : [ "Literal expressions required for pivot values, found 
'<expression>'" ],
     "sqlState" : "42000"
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1e664100545..f79b30f0d0f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2005,4 +2005,9 @@ object QueryExecutionErrors extends QueryErrorsBase {
     new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = 
Array(path),
       cause = null)
   }
+
+  def multipleRowSubqueryError(plan: String): Throwable = {
+    new SparkException(
+      errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters = 
Array(plan), cause = null)
+  }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 209b0f79243..c6f5983f243 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -25,6 +25,7 @@ import 
org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, DataType}
 
@@ -79,9 +80,7 @@ case class ScalarSubquery(
   def updateResult(): Unit = {
     val rows = plan.executeCollect()
     if (rows.length > 1) {
-      // TODO(SPARK-39167): Throw an exception w/ an error class for multiple 
rows from a subquery
-      throw new IllegalStateException(
-        s"more than one row returned by a subquery used as an 
expression:\n$plan")
+      throw QueryExecutionErrors.multipleRowSubqueryError(plan.toString)
     }
     if (rows.length == 1) {
       assert(rows(0).numFields == 1,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 396fca47634..500913fb289 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort}
 import org.apache.spark.sql.execution.{ColumnarToRowExec, 
ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec, 
ScalarSubquery, SubqueryExec, WholeStageCodegenExec}
@@ -146,16 +145,6 @@ class SubquerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
     )
   }
 
-  test("runtime error when the number of rows is greater than 1") {
-    val e = intercept[SparkException] {
-      sql("select (select a from (select 1 as a union all select 2 as a) t) as 
b").collect()
-    }
-    // TODO(SPARK-39167): Throw an exception w/ an error class for multiple 
rows from a subquery
-    assert(e.getErrorClass ===  "INTERNAL_ERROR")
-    assert(e.getCause.getMessage.contains(
-      "more than one row returned by a subquery used as an expression"))
-  }
-
   test("uncorrelated scalar subquery on a DataFrame generated query") {
     withTempView("subqueryData") {
       val df = Seq((1, "one"), (2, "two"), (3, "three")).toDF("key", "value")
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index bbf6c0dda79..e8d1afddde2 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -611,6 +611,33 @@ class QueryExecutionErrorsSuite
         matchMsg = true)
     }
   }
+
+  test("MULTI_VALUE_SUBQUERY_ERROR: " +
+    "more than one row returned by a subquery used as an expression") {
+    checkErrorClass(
+      exception = intercept[SparkException] {
+        sql("select (select a from (select 1 as a union all select 2 as a) t) 
as b").collect()
+      },
+      errorClass = "MULTI_VALUE_SUBQUERY_ERROR",
+      msg =
+        """more than one row returned by a subquery used as an expression: """ 
+
+          """Subquery subquery#\w+, \[id=#\w+\]
+            |\+\- AdaptiveSparkPlan isFinalPlan=true
+            |   \+\- == Final Plan ==
+            |      Union
+            |      :\- \*\(1\) Project \[\w+ AS a#\w+\]
+            |      :  \+\- \*\(1\) Scan OneRowRelation\[\]
+            |      \+\- \*\(2\) Project \[\w+ AS a#\w+\]
+            |         \+\- \*\(2\) Scan OneRowRelation\[\]
+            |   \+\- == Initial Plan ==
+            |      Union
+            |      :\- Project \[\w+ AS a#\w+\]
+            |      :  \+\- Scan OneRowRelation\[\]
+            |      \+\- Project \[\w+ AS a#\w+\]
+            |         \+\- Scan OneRowRelation\[\]
+            |""".stripMargin,
+      matchMsg = true)
+  }
 }
 
 class FakeFileSystemSetPermission extends LocalFileSystem {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression

Reply via email to