This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d0d4aab4378 [SPARK-44154][SQL][FOLLOWUP] `BitmapCount` and 
`BitmapOrAgg` should use `DataTypeMismatch` to indicate unexpected input data 
type
d0d4aab4378 is described below

commit d0d4aab437843ce5adf5900d2d6088e79323f8d5
Author: Bruce Robbins <bersprock...@gmail.com>
AuthorDate: Wed Jul 26 19:31:27 2023 +0800

    [SPARK-44154][SQL][FOLLOWUP] `BitmapCount` and `BitmapOrAgg` should use 
`DataTypeMismatch` to indicate unexpected input data type
    
    ### What changes were proposed in this pull request?
    
    Change `BitmapCount` and `BitmapOrAgg` to use `DataTypeMismatch` rather 
than `TypeCheckResult.TypeCheckFailure` to indicate incorrect input types.
    
    ### Why are the changes needed?
    
    It appears `TypeCheckResult.TypeCheckFailure` has been deprecated: No 
expressions except for the recently added `BitmapCount` and `BitmapOrAgg` are 
using it.
    
    ### Does this PR introduce _any_ user-facing change?
    
    This PR changes an error message for two expressions that are not yet in 
any released version of Spark.
    
    Before PR:
    ```
    spark-sql (default)> select bitmap_count(12);
    [DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT] Cannot resolve 
"bitmap_count(12)" due to data type mismatch: Bitmap must be a BinaryType.; 
line 1 pos 7;
    'Project [unresolvedalias(bitmap_count(12), None)]
    +- OneRowRelation
    
    spark-sql (default)> select bitmap_or_agg(12);
    [DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT] Cannot resolve 
"bitmap_or_agg(12)" due to data type mismatch: Bitmap must be a BinaryType.; 
line 1 pos 7;
    'Aggregate [unresolvedalias(bitmap_or_agg(12, 0, 0), None)]
    +- OneRowRelation
    ```
    After PR:
    ```
    spark-sql (default)> select bitmap_count(12);
    [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "bitmap_count(12)" 
due to data type mismatch: Parameter 0 requires the "BINARY" type, however "12" 
has the type "INT".; line 1 pos 7;
    'Project [unresolvedalias(bitmap_count(12), None)]
    +- OneRowRelation
    
    spark-sql (default)> select bitmap_or_agg(12);
    [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve 
"bitmap_or_agg(12)" due to data type mismatch: Parameter 0 requires the 
"BINARY" type, however "12" has the type "INT".; line 1 pos 7;
    'Aggregate [unresolvedalias(bitmap_or_agg(12, 0, 0), None)]
    +- OneRowRelation
    ```
    ### How was this patch tested?
    
    New unit tests.
    
    Closes #42139 from bersprockets/bitmap_type_check.
    
    Authored-by: Bruce Robbins <bersprock...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../catalyst/expressions/bitmapExpressions.scala   | 26 +++++++++++--
 .../spark/sql/BitmapExpressionsQuerySuite.scala    | 44 ++++++++++++++++++++++
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
index 2adfddb9383..5c7ef5cde5b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala
@@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import 
org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, 
TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.aggregate.ImperativeAggregate
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.catalyst.util.TypeUtils._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, 
LongType, StructType}
 
@@ -111,9 +113,17 @@ case class BitmapCount(child: Expression)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (child.dataType != BinaryType) {
-      TypeCheckResult.TypeCheckFailure("Bitmap must be a BinaryType")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "0",
+          "requiredType" -> toSQLType(BinaryType),
+          "inputSql" -> toSQLExpr(child),
+          "inputType" -> toSQLType(child.dataType)
+        )
+      )
     } else {
-      TypeCheckResult.TypeCheckSuccess
+      TypeCheckSuccess
     }
   }
 
@@ -248,9 +258,17 @@ case class BitmapOrAgg(child: Expression,
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (child.dataType != BinaryType) {
-      TypeCheckResult.TypeCheckFailure("Bitmap must be a BinaryType")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "0",
+          "requiredType" -> toSQLType(BinaryType),
+          "inputSql" -> toSQLExpr(child),
+          "inputType" -> toSQLType(child.dataType)
+        )
+      )
     } else {
-      TypeCheckResult.TypeCheckSuccess
+      TypeCheckSuccess
     }
   }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala
index 6baca72a14e..74c744635ec 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala
@@ -207,4 +207,48 @@ class BitmapExpressionsQuerySuite extends QueryTest with 
SharedSparkSession {
       Seq(Row("700000"))
     )
   }
+
+  test("bitmap_count called with non-binary type") {
+    val df = Seq(12).toDF("a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("bitmap_count(a)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"bitmap_count(a)\"",
+        "paramIndex" -> "0",
+        "requiredType" -> "\"BINARY\"",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"INT\""
+      ),
+      context = ExpectedContext(
+        fragment = "bitmap_count(a)",
+        start = 0,
+        stop = 14
+      )
+    )
+  }
+
+  test("bitmap_or_agg called with non-binary type") {
+    val df = Seq(12).toDF("a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("bitmap_or_agg(a)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"bitmap_or_agg(a)\"",
+        "paramIndex" -> "0",
+        "requiredType" -> "\"BINARY\"",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"INT\""
+      ),
+      context = ExpectedContext(
+        fragment = "bitmap_or_agg(a)",
+        start = 0,
+        stop = 15
+      )
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to