uros-db commented on code in PR #46040: URL: https://github.com/apache/spark/pull/46040#discussion_r1570223698
########## sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala: ########## @@ -212,6 +212,119 @@ class CollationStringExpressionsSuite }) } + test("Support Left/Right/Substr with collation") { + case class SubstringTestCase(query: String, collation: String, result: Row) + val checks = Seq( + SubstringTestCase( + "select substr('example' collate " + "utf8_binary_lcase" + ", 1, 100)", + "utf8_binary_lcase", + Row("example")), + SubstringTestCase( + "select substr('example' collate " + "utf8_binary" + ", 2, 2)", + "utf8_binary", + Row("xa")), + SubstringTestCase( + "select right('' collate " + "utf8_binary_lcase" + ", 1)", + "utf8_binary_lcase", + Row("")), + SubstringTestCase( + "select substr('example' collate " + "unicode" + ", 0, 0)", + "unicode", + Row("")), + SubstringTestCase( + "select substr('example' collate " + "unicode_ci" + ", -3, 2)", + "unicode_ci", + Row("pl")), + SubstringTestCase( + "select substr(' a世a ' collate " + "utf8_binary_lcase" + ", 2, 3)", // scalastyle:ignore + "utf8_binary_lcase", + Row("a世a")), // scalastyle:ignore + SubstringTestCase( + "select left(' a世a ' collate " + "utf8_binary" + ", 3)", // scalastyle:ignore + "utf8_binary", + Row(" a世")), // scalastyle:ignore + SubstringTestCase( + "select right(' a世a ' collate " + "unicode" + ", 3)", // scalastyle:ignore + "unicode", + Row("世a ")), // scalastyle:ignore + SubstringTestCase( + "select left('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "unicode_ci" + ", 3)", // scalastyle:ignore + "unicode_ci", + Row("ÀÃÂ")), // scalastyle:ignore + SubstringTestCase( + "select right('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary_lcase" + ", 3)", // scalastyle:ignore + "utf8_binary_lcase", + Row("ǢǼÆ")), // scalastyle:ignore + SubstringTestCase( + "select substr('' collate " + "utf8_binary_lcase" + ", 1, 1)", + "utf8_binary_lcase", + Row("")), + SubstringTestCase( + "select substr('' collate " + "unicode" + ", 1, 1)", + "unicode", + Row("")), + SubstringTestCase( + "select left('' collate " + "utf8_binary" + ", 1)", + "utf8_binary", + Row("")), + // improper values + SubstringTestCase( + "select left(null collate " + "utf8_binary_lcase" + ", 1)", + "utf8_binary_lcase", + Row(null)), + SubstringTestCase( + "select right(null collate " + "unicode" + ", 1)", + "unicode", + Row(null)), + SubstringTestCase( + "select substr(null collate " + "utf8_binary" + ", 1)", + "utf8_binary", + Row(null)), + SubstringTestCase( + "select substr(null collate " + "unicode_ci" + ", 1, 1)", + "unicode_ci", + Row(null)), + SubstringTestCase( + "select left(null collate " + "utf8_binary_lcase" + ", null)", + "utf8_binary_lcase", + Row(null)), + SubstringTestCase( + "select right(null collate " + "unicode" + ", null)", + "unicode", + Row(null)), + SubstringTestCase( + "select substr(null collate " + "utf8_binary" + ", null)", + "utf8_binary", + Row(null)), + SubstringTestCase( + "select substr(null collate " + "unicode_ci" + ", null, null)", + "unicode_ci", + Row(null)), + SubstringTestCase( + "select left('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary_lcase" + ", null)", // scalastyle:ignore + "utf8_binary_lcase", + Row(null)), + SubstringTestCase( + "select right('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "unicode" + ", null)", // scalastyle:ignore + "unicode", + Row(null)), + SubstringTestCase( + "select substr('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary" + ", null)", // scalastyle:ignore + "utf8_binary", + Row(null)), + SubstringTestCase( + "select substr('' collate " + "unicode_ci" + ", null, null)", + "unicode_ci", + Row(null)) Review Comment: It's fine if there's a parameter and it's not used in some cases, I don't think that causes any error otherwise you can always introduce `LeftRightTestCase`, whatever works quantity is fine here, what's important is proper coverage without needless repetition, these e2e sql tests are pretty slow, so having hundreds of them for trivial expressions is less than ideal -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org