[spark] branch master updated: [SPARK-40368][SQL] Migrate Bloom Filter type check failures onto error classes

maxgekk Tue, 18 Oct 2022 06:19:37 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 11bc5ce19ef [SPARK-40368][SQL] Migrate Bloom Filter type check 
failures onto error classes
11bc5ce19ef is described below

commit 11bc5ce19ef5db9e4c9c6925a31a85070a0c0495
Author: lvshaokang <lvshaoka...@gmail.com>
AuthorDate: Tue Oct 18 18:19:12 2022 +0500

    [SPARK-40368][SQL] Migrate Bloom Filter type check failures onto error 
classes
    
    ### What changes were proposed in this pull request?
    
    In the PR, I propose to use error classes in the case of type check failure 
in Bloom Filter expressions.
    
    ### Why are the changes needed?
    
    Migration onto error classes unifies Spark SQL error messages.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. The PR changes user-facing error messages.
    
    ### How was this patch tested?
    
    ```
    build/sbt "sql/testOnly *SQLQueryTestSuite"
    build/sbt "test:testOnly org.apache.spark.SparkThrowableSuite"
    build/sbt "test:testOnly *BloomFilterAggregateQuerySuite"
    ```
    
    Closes #38251 from lvshaokang/SPARK-40368.
    
    Authored-by: lvshaokang <lvshaoka...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 10 +++
 .../expressions/BloomFilterMightContain.scala      | 25 ++++++--
 .../spark/sql/BloomFilterAggregateQuerySuite.scala | 72 +++++++++++++++++++---
 3 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 1474d800f72..3ffbedff4c9 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -101,6 +101,16 @@
           "the binary operator requires the input type <inputType>, not 
<actualDataType>."
         ]
       },
+      "BLOOM_FILTER_BINARY_OP_WRONG_TYPE" : {
+        "message" : [
+          "The Bloom filter binary input to <functionName> should be either a 
constant value or a scalar subquery expression, but it's <actual>."
+        ]
+      },
+      "BLOOM_FILTER_WRONG_TYPE" : {
+        "message" : [
+          "Input to function <functionName> should have been <expectedLeft> 
followed by a value with <expectedRight>, but it's [<actualLeft>, 
<actualRight>]."
+        ]
+      },
       "CANNOT_CONVERT_TO_JSON" : {
         "message" : [
           "Unable to convert column <name> of type <type> to JSON."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
index 24d1dd69d9d..5cb19d36b80 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
@@ -21,6 +21,8 @@ import java.io.ByteArrayInputStream
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLId, 
toSQLType}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode, JavaCode, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
 import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE
@@ -59,12 +61,25 @@ case class BloomFilterMightContain(
             if !subquery.containsPattern(OUTER_REFERENCE) =>
             TypeCheckResult.TypeCheckSuccess
           case _ =>
-            TypeCheckResult.TypeCheckFailure(s"The Bloom filter binary input 
to $prettyName " +
-              "should be either a constant value or a scalar subquery 
expression")
+            DataTypeMismatch(
+              errorSubClass = "BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+              messageParameters = Map(
+                "functionName" -> toSQLId(prettyName),
+                "actual" -> toSQLExpr(bloomFilterExpression)
+              )
+            )
         }
-      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function 
$prettyName should have " +
-        s"been ${BinaryType.simpleString} followed by a value with 
${LongType.simpleString}, " +
-        s"but it's [${left.dataType.catalogString}, 
${right.dataType.catalogString}].")
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "BLOOM_FILTER_WRONG_TYPE",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "expectedLeft" -> toSQLType(BinaryType),
+            "expectedRight" -> toSQLType(LongType),
+            "actualLeft" -> toSQLType(left.dataType),
+            "actualRight" -> toSQLType(right.dataType)
+          )
+        )
     }
   }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
index 05513cddccb..6a22414db00 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
@@ -152,15 +152,45 @@ class BloomFilterAggregateQuerySuite extends QueryTest 
with SharedSparkSession {
       spark.sql("""|SELECT might_contain(1.0, 1L)"""
         .stripMargin)
     }
-    assert(exception1.getMessage.contains(
-      "Input to function might_contain should have been binary followed by a 
value with bigint"))
+    checkError(
+      exception = exception1,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(1.0, 1)\"",
+        "functionName" -> "`might_contain`",
+        "expectedLeft" -> "\"BINARY\"",
+        "expectedRight" -> "\"BIGINT\"",
+        "actualLeft" -> "\"DECIMAL(2,1)\"",
+        "actualRight" -> "\"BIGINT\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain(1.0, 1L)",
+        start = 7,
+        stop = 28
+      )
+    )
 
     val exception2 = intercept[AnalysisException] {
       spark.sql("""|SELECT might_contain(NULL, 0.1)"""
         .stripMargin)
     }
-    assert(exception2.getMessage.contains(
-      "Input to function might_contain should have been binary followed by a 
value with bigint"))
+    checkError(
+      exception = exception2,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(NULL, 0.1)\"",
+        "functionName" -> "`might_contain`",
+        "expectedLeft" -> "\"BINARY\"",
+        "expectedRight" -> "\"BIGINT\"",
+        "actualLeft" -> "\"VOID\"",
+        "actualRight" -> "\"DECIMAL(1,1)\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain(NULL, 0.1)",
+        start = 7,
+        stop = 30
+      )
+    )
   }
 
   test("Test that might_contain errors out non-constant Bloom filter") {
@@ -170,9 +200,20 @@ class BloomFilterAggregateQuerySuite extends QueryTest 
with SharedSparkSession {
                   |FROM values (cast(1 as string)), (cast(2 as string)) as 
t(a)"""
         .stripMargin)
     }
-    assert(exception1.getMessage.contains(
-      "The Bloom filter binary input to might_contain should be either a 
constant value or " +
-        "a scalar subquery expression"))
+    checkError(
+      exception = exception1,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(CAST(a AS BINARY), CAST(5 AS BIGINT))\"",
+        "functionName" -> "`might_contain`",
+        "actual" -> "\"CAST(a AS BINARY)\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain(cast(a as binary), cast(5 as long))",
+        start = 8,
+        stop = 56
+      )
+    )
 
     val exception2 = intercept[AnalysisException] {
       spark.sql("""
@@ -180,9 +221,20 @@ class BloomFilterAggregateQuerySuite extends QueryTest 
with SharedSparkSession {
                   |FROM values (cast(1 as string)), (cast(2 as string)) as 
t(a)"""
         .stripMargin)
     }
-    assert(exception2.getMessage.contains(
-      "The Bloom filter binary input to might_contain should be either a 
constant value or " +
-        "a scalar subquery expression"))
+    checkError(
+      exception = exception2,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(scalarsubquery(a), CAST(5 AS BIGINT))\"",
+        "functionName" -> "`might_contain`",
+        "actual" -> "\"scalarsubquery(a)\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain((select cast(a as binary)), cast(5 as 
long))",
+        start = 8,
+        stop = 65
+      )
+    )
   }
 
   test("Test that might_contain can take a constant value input") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-40368][SQL] Migrate Bloom Filter type check failures onto error classes

Reply via email to