mihailom-db commented on code in PR #45819: URL: https://github.com/apache/spark/pull/45819#discussion_r1555542892
########## sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala: ########## @@ -645,6 +646,34 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { }, errorClass = "COLLATION_MISMATCH.IMPLICIT" ) + + // check if substring passes through implicit collation + checkError( + exception = intercept[AnalysisException] { + sql(s"SELECT substr('a' COLLATE UNICODE, 0, 1) == substr('b' COLLATE UNICODE_CI, 0, 1)") + }, + errorClass = "COLLATION_MISMATCH.IMPLICIT" + ) + + checkAnswer(spark.sql("SELECT collation(:var1 || :var2)", + Map( + "var1" -> Literal.create("a", StringType(1)), + "var2" -> Literal.create("b", StringType(2)) + ) + ), + Seq(Row("UTF8_BINARY")) + ) + + withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") { + checkAnswer(spark.sql("SELECT collation(:var1 || :var2)", + Map( + "var1" -> Literal.create("a", StringType(1)), + "var2" -> Literal.create("b", StringType(2)) + ) + ), + Seq(Row("UNICODE")) + ) + } Review Comment: @srielau Is this the expected behaviour? Apparently we can pass different collations to parameters. I understood that the behaviour should be if StringType has priority of default then it has to have session level default collation, as otherwise we might have different collations with same default priority, which is not covered by design. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org