uros-db commented on code in PR #45933:
URL: https://github.com/apache/spark/pull/45933#discussion_r1567181125


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -161,6 +161,78 @@ class CollationStringExpressionsSuite
     })
   }
 
+  test("Ascii & UnBase64 string expressions with collation") {
+    case class AsciiUnBase64TestCase[R](q: String, dt: DataType, r: R)
+    val testCases = Seq(
+      AsciiUnBase64TestCase("select ascii('a' collate utf8_binary)", 
IntegerType, Row(97)),
+      AsciiUnBase64TestCase("select ascii('a' collate utf8_binary_lcase)", 
IntegerType, Row(97)),
+      AsciiUnBase64TestCase("select unbase64('YQ==' collate utf8_binary)", 
BinaryType,
+        Row(Seq(97))),
+      AsciiUnBase64TestCase("select unbase64('YQ==' collate 
utf8_binary_lcase)", BinaryType,
+        Row(Seq(97)))
+    )
+    testCases.foreach(t => {
+      // Result & data type
+      checkAnswer(sql(t.q), t.r)
+      assert(sql(t.q).schema.fields.head.dataType.sameType(t.dt))
+    })
+  }
+
+  test("Chr, Base64, Decode & FormatNumber string expressions with collation") 
{
+    case class DefaultCollationTestCase[R](q: String, c: String, r: R)
+    val testCases = Seq(
+      DefaultCollationTestCase("select chr(97)", "UTF8_BINARY", Row("a")),
+      DefaultCollationTestCase("select chr(97)", "UTF8_BINARY_LCASE", 
Row("a")),
+      DefaultCollationTestCase("select base64('a')", "UTF8_BINARY", 
Row("YQ==")),
+      DefaultCollationTestCase("select base64('a')", "UTF8_BINARY_LCASE", 
Row("YQ==")),
+      DefaultCollationTestCase("select decode(encode('a', 'utf-8'), 'utf-8')", 
"UTF8_BINARY",
+        Row("a")),
+      DefaultCollationTestCase("select decode(encode('a', 'utf-8'), 'utf-8')",
+        "UTF8_BINARY_LCASE", Row("a")),
+      DefaultCollationTestCase("select format_number(123.123, '###.###')", 
"UTF8_BINARY",
+        Row("123.123")),
+      DefaultCollationTestCase("select format_number(123.123, '###.###')", 
"UTF8_BINARY_LCASE",
+        Row("123.123"))
+    )
+    testCases.foreach(t => {
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.c) {
+        // Result & data type
+        checkAnswer(sql(t.q), t.r)
+        assert(sql(t.q).schema.fields.head.dataType.sameType(StringType(t.c)))
+      }
+    })
+  }
+
+  test("Encode, ToBinary & Sentences string expressions with collation") {
+    case class EncodeToBinarySentencesTestCase[R](q: String, dt: DataType, r: 
R)
+    val testCases = Seq(
+      EncodeToBinarySentencesTestCase("select encode('a' collate utf8_binary, 
'utf-8')",
+        BinaryType, Row(Seq(97))),
+      EncodeToBinarySentencesTestCase("select encode('a' collate 
utf8_binary_lcase, 'utf-8')",
+        BinaryType, Row(Seq(97))),

Review Comment:
   since we are already very conservative with these e2e sql tests, let's at 
least set the standard to make them a bit more fun (vary case/accent, use some 
variable length characters, etc.) instead of using 'a' again



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to