uros-db commented on code in PR #46040:
URL: https://github.com/apache/spark/pull/46040#discussion_r1570223698


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -212,6 +212,119 @@ class CollationStringExpressionsSuite
     })
   }
 
+  test("Support Left/Right/Substr with collation") {
+    case class SubstringTestCase(query: String, collation: String, result: Row)
+    val checks = Seq(
+      SubstringTestCase(
+        "select substr('example' collate " + "utf8_binary_lcase" + ", 1, 100)",
+        "utf8_binary_lcase",
+        Row("example")),
+      SubstringTestCase(
+        "select substr('example' collate " + "utf8_binary" + ", 2, 2)",
+        "utf8_binary",
+        Row("xa")),
+      SubstringTestCase(
+        "select right('' collate " + "utf8_binary_lcase" + ", 1)",
+        "utf8_binary_lcase",
+        Row("")),
+      SubstringTestCase(
+        "select substr('example' collate " + "unicode" + ", 0, 0)",
+        "unicode",
+        Row("")),
+      SubstringTestCase(
+        "select substr('example' collate " + "unicode_ci" + ", -3, 2)",
+        "unicode_ci",
+        Row("pl")),
+      SubstringTestCase(
+        "select substr(' a世a ' collate " + "utf8_binary_lcase" + ", 2, 3)", // 
scalastyle:ignore
+        "utf8_binary_lcase",
+        Row("a世a")), // scalastyle:ignore
+      SubstringTestCase(
+        "select left(' a世a ' collate " + "utf8_binary" + ", 3)", // 
scalastyle:ignore
+        "utf8_binary",
+        Row(" a世")), // scalastyle:ignore
+      SubstringTestCase(
+        "select right(' a世a ' collate " + "unicode" + ", 3)", // 
scalastyle:ignore
+        "unicode",
+        Row("世a ")), // scalastyle:ignore
+      SubstringTestCase(
+        "select left('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "unicode_ci" + ", 
3)", // scalastyle:ignore
+        "unicode_ci",
+        Row("ÀÃÂ")), // scalastyle:ignore
+      SubstringTestCase(
+        "select right('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary_lcase" 
+ ", 3)", // scalastyle:ignore
+        "utf8_binary_lcase",
+        Row("ǢǼÆ")), // scalastyle:ignore
+      SubstringTestCase(
+        "select substr('' collate " + "utf8_binary_lcase" + ", 1, 1)",
+        "utf8_binary_lcase",
+        Row("")),
+      SubstringTestCase(
+        "select substr('' collate " + "unicode" + ", 1, 1)",
+        "unicode",
+        Row("")),
+      SubstringTestCase(
+        "select left('' collate " + "utf8_binary" + ", 1)",
+        "utf8_binary",
+        Row("")),
+        // improper values
+      SubstringTestCase(
+        "select left(null collate " + "utf8_binary_lcase" + ", 1)",
+        "utf8_binary_lcase",
+        Row(null)),
+      SubstringTestCase(
+        "select right(null collate " + "unicode" + ", 1)",
+        "unicode",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "utf8_binary" + ", 1)",
+        "utf8_binary",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "unicode_ci" + ", 1, 1)",
+        "unicode_ci",
+        Row(null)),
+      SubstringTestCase(
+        "select left(null collate " + "utf8_binary_lcase" + ", null)",
+        "utf8_binary_lcase",
+        Row(null)),
+      SubstringTestCase(
+        "select right(null collate " + "unicode" + ", null)",
+        "unicode",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "utf8_binary" + ", null)",
+        "utf8_binary",
+        Row(null)),
+      SubstringTestCase(
+        "select substr(null collate " + "unicode_ci" + ", null, null)",
+        "unicode_ci",
+        Row(null)),
+      SubstringTestCase(
+        "select left('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary_lcase" + 
", null)", // scalastyle:ignore
+        "utf8_binary_lcase",
+        Row(null)),
+      SubstringTestCase(
+        "select right('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "unicode" + ", 
null)", // scalastyle:ignore
+        "unicode",
+        Row(null)),
+      SubstringTestCase(
+        "select substr('ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ' collate " + "utf8_binary" + ", 
null)", // scalastyle:ignore
+        "utf8_binary",
+        Row(null)),
+      SubstringTestCase(
+        "select substr('' collate " + "unicode_ci" + ", null, null)",
+        "unicode_ci",
+        Row(null))

Review Comment:
   It's fine if there's a parameter and it's not used in some cases, I don't 
think that causes any error
   
   otherwise you can always introduce `LeftRightTestCase`, whatever works
   
   quantity is fine here, what's important is proper coverage without needless 
repetition, these e2e sql tests are pretty slow, so having hundreds of them for 
trivial expressions is less than ideal



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to