This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 11bc5ce19ef [SPARK-40368][SQL] Migrate Bloom Filter type check failures onto error classes 11bc5ce19ef is described below commit 11bc5ce19ef5db9e4c9c6925a31a85070a0c0495 Author: lvshaokang <lvshaoka...@gmail.com> AuthorDate: Tue Oct 18 18:19:12 2022 +0500 [SPARK-40368][SQL] Migrate Bloom Filter type check failures onto error classes ### What changes were proposed in this pull request? In the PR, I propose to use error classes in the case of type check failure in Bloom Filter expressions. ### Why are the changes needed? Migration onto error classes unifies Spark SQL error messages. ### Does this PR introduce _any_ user-facing change? Yes. The PR changes user-facing error messages. ### How was this patch tested? ``` build/sbt "sql/testOnly *SQLQueryTestSuite" build/sbt "test:testOnly org.apache.spark.SparkThrowableSuite" build/sbt "test:testOnly *BloomFilterAggregateQuerySuite" ``` Closes #38251 from lvshaokang/SPARK-40368. Authored-by: lvshaokang <lvshaoka...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 10 +++ .../expressions/BloomFilterMightContain.scala | 25 ++++++-- .../spark/sql/BloomFilterAggregateQuerySuite.scala | 72 +++++++++++++++++++--- 3 files changed, 92 insertions(+), 15 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 1474d800f72..3ffbedff4c9 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -101,6 +101,16 @@ "the binary operator requires the input type <inputType>, not <actualDataType>." ] }, + "BLOOM_FILTER_BINARY_OP_WRONG_TYPE" : { + "message" : [ + "The Bloom filter binary input to <functionName> should be either a constant value or a scalar subquery expression, but it's <actual>." + ] + }, + "BLOOM_FILTER_WRONG_TYPE" : { + "message" : [ + "Input to function <functionName> should have been <expectedLeft> followed by a value with <expectedRight>, but it's [<actualLeft>, <actualRight>]." + ] + }, "CANNOT_CONVERT_TO_JSON" : { "message" : [ "Unable to convert column <name> of type <type> to JSON." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala index 24d1dd69d9d..5cb19d36b80 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala @@ -21,6 +21,8 @@ import java.io.ByteArrayInputStream import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch +import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLId, toSQLType} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, JavaCode, TrueLiteral} import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE @@ -59,12 +61,25 @@ case class BloomFilterMightContain( if !subquery.containsPattern(OUTER_REFERENCE) => TypeCheckResult.TypeCheckSuccess case _ => - TypeCheckResult.TypeCheckFailure(s"The Bloom filter binary input to $prettyName " + - "should be either a constant value or a scalar subquery expression") + DataTypeMismatch( + errorSubClass = "BLOOM_FILTER_BINARY_OP_WRONG_TYPE", + messageParameters = Map( + "functionName" -> toSQLId(prettyName), + "actual" -> toSQLExpr(bloomFilterExpression) + ) + ) } - case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " + - s"been ${BinaryType.simpleString} followed by a value with ${LongType.simpleString}, " + - s"but it's [${left.dataType.catalogString}, ${right.dataType.catalogString}].") + case _ => + DataTypeMismatch( + errorSubClass = "BLOOM_FILTER_WRONG_TYPE", + messageParameters = Map( + "functionName" -> toSQLId(prettyName), + "expectedLeft" -> toSQLType(BinaryType), + "expectedRight" -> toSQLType(LongType), + "actualLeft" -> toSQLType(left.dataType), + "actualRight" -> toSQLType(right.dataType) + ) + ) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala index 05513cddccb..6a22414db00 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala @@ -152,15 +152,45 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { spark.sql("""|SELECT might_contain(1.0, 1L)""" .stripMargin) } - assert(exception1.getMessage.contains( - "Input to function might_contain should have been binary followed by a value with bigint")) + checkError( + exception = exception1, + errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE", + parameters = Map( + "sqlExpr" -> "\"might_contain(1.0, 1)\"", + "functionName" -> "`might_contain`", + "expectedLeft" -> "\"BINARY\"", + "expectedRight" -> "\"BIGINT\"", + "actualLeft" -> "\"DECIMAL(2,1)\"", + "actualRight" -> "\"BIGINT\"" + ), + context = ExpectedContext( + fragment = "might_contain(1.0, 1L)", + start = 7, + stop = 28 + ) + ) val exception2 = intercept[AnalysisException] { spark.sql("""|SELECT might_contain(NULL, 0.1)""" .stripMargin) } - assert(exception2.getMessage.contains( - "Input to function might_contain should have been binary followed by a value with bigint")) + checkError( + exception = exception2, + errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE", + parameters = Map( + "sqlExpr" -> "\"might_contain(NULL, 0.1)\"", + "functionName" -> "`might_contain`", + "expectedLeft" -> "\"BINARY\"", + "expectedRight" -> "\"BIGINT\"", + "actualLeft" -> "\"VOID\"", + "actualRight" -> "\"DECIMAL(1,1)\"" + ), + context = ExpectedContext( + fragment = "might_contain(NULL, 0.1)", + start = 7, + stop = 30 + ) + ) } test("Test that might_contain errors out non-constant Bloom filter") { @@ -170,9 +200,20 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { |FROM values (cast(1 as string)), (cast(2 as string)) as t(a)""" .stripMargin) } - assert(exception1.getMessage.contains( - "The Bloom filter binary input to might_contain should be either a constant value or " + - "a scalar subquery expression")) + checkError( + exception = exception1, + errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE", + parameters = Map( + "sqlExpr" -> "\"might_contain(CAST(a AS BINARY), CAST(5 AS BIGINT))\"", + "functionName" -> "`might_contain`", + "actual" -> "\"CAST(a AS BINARY)\"" + ), + context = ExpectedContext( + fragment = "might_contain(cast(a as binary), cast(5 as long))", + start = 8, + stop = 56 + ) + ) val exception2 = intercept[AnalysisException] { spark.sql(""" @@ -180,9 +221,20 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { |FROM values (cast(1 as string)), (cast(2 as string)) as t(a)""" .stripMargin) } - assert(exception2.getMessage.contains( - "The Bloom filter binary input to might_contain should be either a constant value or " + - "a scalar subquery expression")) + checkError( + exception = exception2, + errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE", + parameters = Map( + "sqlExpr" -> "\"might_contain(scalarsubquery(a), CAST(5 AS BIGINT))\"", + "functionName" -> "`might_contain`", + "actual" -> "\"scalarsubquery(a)\"" + ), + context = ExpectedContext( + fragment = "might_contain((select cast(a as binary)), cast(5 as long))", + start = 8, + stop = 65 + ) + ) } test("Test that might_contain can take a constant value input") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org