This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 91da2caa409c [SPARK-48263] Collate function support for non 
UTF8_BINARY strings
91da2caa409c is described below

commit 91da2caa409cb156a970fea0fc8355fcd8c6a2e6
Author: Nebojsa Savic <nebojsa.sa...@databricks.com>
AuthorDate: Tue May 14 23:39:26 2024 +0800

    [SPARK-48263] Collate function support for non UTF8_BINARY strings
    
    ### What changes were proposed in this pull request?
    collate("xx", "<non default>") does not work when there is a config for 
default collation set which configures non UTF8_BINARY collation as default.
    
    ### Why are the changes needed?
    Fixing the compatibility issue with default collation config and collate 
function.
    
    ### Does this PR introduce _any_ user-facing change?
    Customers will be able to execute collation(<string>, <collation>) function 
even when default collation config is configured to some other collation than 
UTF8_BINARY. We are expanding the surface area for cx.
    
    ### How was this patch tested?
    Added tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #46574 from nebojsa-db/SPARK-48263.
    
    Authored-by: Nebojsa Savic <nebojsa.sa...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../sql/catalyst/expressions/collationExpressions.scala    |  4 ++--
 .../test/scala/org/apache/spark/sql/CollationSuite.scala   | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
index 6af00e193d94..7c02475a60ad 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
@@ -57,14 +57,14 @@ object CollateExpressionBuilder extends ExpressionBuilder {
     expressions match {
       case Seq(e: Expression, collationExpr: Expression) =>
         (collationExpr.dataType, collationExpr.foldable) match {
-          case (StringType, true) =>
+          case (_: StringType, true) =>
             val evalCollation = collationExpr.eval()
             if (evalCollation == null) {
               throw QueryCompilationErrors.unexpectedNullError("collation", 
collationExpr)
             } else {
               Collate(e, evalCollation.toString)
             }
-          case (StringType, false) => throw 
QueryCompilationErrors.nonFoldableArgumentError(
+          case (_: StringType, false) => throw 
QueryCompilationErrors.nonFoldableArgumentError(
             funcName, "collationName", StringType)
           case (_, _) => throw 
QueryCompilationErrors.unexpectedInputDataTypeError(
             funcName, 1, StringType, collationExpr)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index fce9ad3cc184..b22a762a2954 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -67,8 +67,18 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
   }
 
   test("collate function syntax") {
-    assert(sql(s"select collate('aaa', 'utf8_binary')").schema(0).dataType == 
StringType(0))
-    assert(sql(s"select collate('aaa', 
'utf8_binary_lcase')").schema(0).dataType == StringType(1))
+    assert(sql(s"select collate('aaa', 'utf8_binary')").schema(0).dataType ==
+      StringType("UTF8_BINARY"))
+    assert(sql(s"select collate('aaa', 
'utf8_binary_lcase')").schema(0).dataType ==
+      StringType("UTF8_BINARY_LCASE"))
+  }
+
+  test("collate function syntax with default collation set") {
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_BINARY_LCASE") {
+      assert(sql(s"select collate('aaa', 
'utf8_binary_lcase')").schema(0).dataType ==
+        StringType("UTF8_BINARY_LCASE"))
+      assert(sql(s"select collate('aaa', 'UNICODE')").schema(0).dataType == 
StringType("UNICODE"))
+    }
   }
 
   test("collate function syntax invalid arg count") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to