nikolamand-db commented on code in PR #45856: URL: https://github.com/apache/spark/pull/45856#discussion_r1565804085
########## sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala: ########## @@ -116,26 +116,37 @@ class CollationRegexpExpressionsSuite test("Support StringSplit string expression with collation") { // Supported collations - case class StringSplitTestCase[R](l: String, r: String, c: String, result: R) + case class StringSplitTestCase[R](l: String, r: String, c: String, result: R, limit: Int = -1) val testCases = Seq( - StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C")) + StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C")), + StringSplitTestCase("ABC", "[b]", "UTF8_BINARY", Seq("ABC")), + StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C")), + StringSplitTestCase("AAA", "[a]", "UTF8_BINARY_LCASE", Seq("", "", "", "")), + StringSplitTestCase("AAA", "[b]", "UTF8_BINARY_LCASE", Seq("AAA")), + StringSplitTestCase("aAbB", "[ab]", "UTF8_BINARY_LCASE", Seq("", "", "", "", "")), + StringSplitTestCase("", "", "UTF8_BINARY_LCASE", Seq("")), + StringSplitTestCase("", "[a]", "UTF8_BINARY_LCASE", Seq("")), + StringSplitTestCase("xAxBxaxbx", "[AB]", "UTF8_BINARY_LCASE", Seq("x", "x", "x", "x", "x")), + StringSplitTestCase("ABC", "", "UTF8_BINARY_LCASE", Seq("A", "B", "C")), + // test split with limit + StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("ABC"), 1), + StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 2), + StringSplitTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", Seq("A", "C"), 3), + StringSplitTestCase("ABC", "[B]", "UNICODE", Seq("A", "C")), + StringSplitTestCase("ABC", "[b]", "UNICODE", Seq("ABC")) ) testCases.foreach(t => { - val query = s"SELECT split(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))" + val query = s"SELECT split(collate('${t.l}', '${t.c}'), '${t.r}', ${t.limit})" // Result & data type checkAnswer(sql(query), Row(t.result)) assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c)))) // TODO: Implicit casting (not currently supported) Review Comment: Removed both TODOs since string split doesn't have any custom collation cast logic as regex parameter's collation is irrelevant. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org