This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 91da2caa409c [SPARK-48263] Collate function support for non UTF8_BINARY strings 91da2caa409c is described below commit 91da2caa409cb156a970fea0fc8355fcd8c6a2e6 Author: Nebojsa Savic <nebojsa.sa...@databricks.com> AuthorDate: Tue May 14 23:39:26 2024 +0800 [SPARK-48263] Collate function support for non UTF8_BINARY strings ### What changes were proposed in this pull request? collate("xx", "<non default>") does not work when there is a config for default collation set which configures non UTF8_BINARY collation as default. ### Why are the changes needed? Fixing the compatibility issue with default collation config and collate function. ### Does this PR introduce _any_ user-facing change? Customers will be able to execute collation(<string>, <collation>) function even when default collation config is configured to some other collation than UTF8_BINARY. We are expanding the surface area for cx. ### How was this patch tested? Added tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46574 from nebojsa-db/SPARK-48263. Authored-by: Nebojsa Savic <nebojsa.sa...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/expressions/collationExpressions.scala | 4 ++-- .../test/scala/org/apache/spark/sql/CollationSuite.scala | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala index 6af00e193d94..7c02475a60ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala @@ -57,14 +57,14 @@ object CollateExpressionBuilder extends ExpressionBuilder { expressions match { case Seq(e: Expression, collationExpr: Expression) => (collationExpr.dataType, collationExpr.foldable) match { - case (StringType, true) => + case (_: StringType, true) => val evalCollation = collationExpr.eval() if (evalCollation == null) { throw QueryCompilationErrors.unexpectedNullError("collation", collationExpr) } else { Collate(e, evalCollation.toString) } - case (StringType, false) => throw QueryCompilationErrors.nonFoldableArgumentError( + case (_: StringType, false) => throw QueryCompilationErrors.nonFoldableArgumentError( funcName, "collationName", StringType) case (_, _) => throw QueryCompilationErrors.unexpectedInputDataTypeError( funcName, 1, StringType, collationExpr) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index fce9ad3cc184..b22a762a2954 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -67,8 +67,18 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { } test("collate function syntax") { - assert(sql(s"select collate('aaa', 'utf8_binary')").schema(0).dataType == StringType(0)) - assert(sql(s"select collate('aaa', 'utf8_binary_lcase')").schema(0).dataType == StringType(1)) + assert(sql(s"select collate('aaa', 'utf8_binary')").schema(0).dataType == + StringType("UTF8_BINARY")) + assert(sql(s"select collate('aaa', 'utf8_binary_lcase')").schema(0).dataType == + StringType("UTF8_BINARY_LCASE")) + } + + test("collate function syntax with default collation set") { + withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_BINARY_LCASE") { + assert(sql(s"select collate('aaa', 'utf8_binary_lcase')").schema(0).dataType == + StringType("UTF8_BINARY_LCASE")) + assert(sql(s"select collate('aaa', 'UNICODE')").schema(0).dataType == StringType("UNICODE")) + } } test("collate function syntax invalid arg count") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org