This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d0d4aab4378 [SPARK-44154][SQL][FOLLOWUP] `BitmapCount` and `BitmapOrAgg` should use `DataTypeMismatch` to indicate unexpected input data type d0d4aab4378 is described below commit d0d4aab437843ce5adf5900d2d6088e79323f8d5 Author: Bruce Robbins <bersprock...@gmail.com> AuthorDate: Wed Jul 26 19:31:27 2023 +0800 [SPARK-44154][SQL][FOLLOWUP] `BitmapCount` and `BitmapOrAgg` should use `DataTypeMismatch` to indicate unexpected input data type ### What changes were proposed in this pull request? Change `BitmapCount` and `BitmapOrAgg` to use `DataTypeMismatch` rather than `TypeCheckResult.TypeCheckFailure` to indicate incorrect input types. ### Why are the changes needed? It appears `TypeCheckResult.TypeCheckFailure` has been deprecated: No expressions except for the recently added `BitmapCount` and `BitmapOrAgg` are using it. ### Does this PR introduce _any_ user-facing change? This PR changes an error message for two expressions that are not yet in any released version of Spark. Before PR: ``` spark-sql (default)> select bitmap_count(12); [DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT] Cannot resolve "bitmap_count(12)" due to data type mismatch: Bitmap must be a BinaryType.; line 1 pos 7; 'Project [unresolvedalias(bitmap_count(12), None)] +- OneRowRelation spark-sql (default)> select bitmap_or_agg(12); [DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT] Cannot resolve "bitmap_or_agg(12)" due to data type mismatch: Bitmap must be a BinaryType.; line 1 pos 7; 'Aggregate [unresolvedalias(bitmap_or_agg(12, 0, 0), None)] +- OneRowRelation ``` After PR: ``` spark-sql (default)> select bitmap_count(12); [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "bitmap_count(12)" due to data type mismatch: Parameter 0 requires the "BINARY" type, however "12" has the type "INT".; line 1 pos 7; 'Project [unresolvedalias(bitmap_count(12), None)] +- OneRowRelation spark-sql (default)> select bitmap_or_agg(12); [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "bitmap_or_agg(12)" due to data type mismatch: Parameter 0 requires the "BINARY" type, however "12" has the type "INT".; line 1 pos 7; 'Aggregate [unresolvedalias(bitmap_or_agg(12, 0, 0), None)] +- OneRowRelation ``` ### How was this patch tested? New unit tests. Closes #42139 from bersprockets/bitmap_type_check. Authored-by: Bruce Robbins <bersprock...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../catalyst/expressions/bitmapExpressions.scala | 26 +++++++++++-- .../spark/sql/BitmapExpressionsQuerySuite.scala | 44 ++++++++++++++++++++++ 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala index 2adfddb9383..5c7ef5cde5b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitmapExpressions.scala @@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions.aggregate.ImperativeAggregate import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.trees.UnaryLike import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.catalyst.util.TypeUtils._ import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, LongType, StructType} @@ -111,9 +113,17 @@ case class BitmapCount(child: Expression) override def checkInputDataTypes(): TypeCheckResult = { if (child.dataType != BinaryType) { - TypeCheckResult.TypeCheckFailure("Bitmap must be a BinaryType") + DataTypeMismatch( + errorSubClass = "UNEXPECTED_INPUT_TYPE", + messageParameters = Map( + "paramIndex" -> "0", + "requiredType" -> toSQLType(BinaryType), + "inputSql" -> toSQLExpr(child), + "inputType" -> toSQLType(child.dataType) + ) + ) } else { - TypeCheckResult.TypeCheckSuccess + TypeCheckSuccess } } @@ -248,9 +258,17 @@ case class BitmapOrAgg(child: Expression, override def checkInputDataTypes(): TypeCheckResult = { if (child.dataType != BinaryType) { - TypeCheckResult.TypeCheckFailure("Bitmap must be a BinaryType") + DataTypeMismatch( + errorSubClass = "UNEXPECTED_INPUT_TYPE", + messageParameters = Map( + "paramIndex" -> "0", + "requiredType" -> toSQLType(BinaryType), + "inputSql" -> toSQLExpr(child), + "inputType" -> toSQLType(child.dataType) + ) + ) } else { - TypeCheckResult.TypeCheckSuccess + TypeCheckSuccess } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala index 6baca72a14e..74c744635ec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala @@ -207,4 +207,48 @@ class BitmapExpressionsQuerySuite extends QueryTest with SharedSparkSession { Seq(Row("700000")) ) } + + test("bitmap_count called with non-binary type") { + val df = Seq(12).toDF("a") + checkError( + exception = intercept[AnalysisException] { + df.selectExpr("bitmap_count(a)") + }, + errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + parameters = Map( + "sqlExpr" -> "\"bitmap_count(a)\"", + "paramIndex" -> "0", + "requiredType" -> "\"BINARY\"", + "inputSql" -> "\"a\"", + "inputType" -> "\"INT\"" + ), + context = ExpectedContext( + fragment = "bitmap_count(a)", + start = 0, + stop = 14 + ) + ) + } + + test("bitmap_or_agg called with non-binary type") { + val df = Seq(12).toDF("a") + checkError( + exception = intercept[AnalysisException] { + df.selectExpr("bitmap_or_agg(a)") + }, + errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + parameters = Map( + "sqlExpr" -> "\"bitmap_or_agg(a)\"", + "paramIndex" -> "0", + "requiredType" -> "\"BINARY\"", + "inputSql" -> "\"a\"", + "inputType" -> "\"INT\"" + ), + context = ExpectedContext( + fragment = "bitmap_or_agg(a)", + start = 0, + stop = 15 + ) + ) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org