uros-db commented on code in PR #45216:
URL: https://github.com/apache/spark/pull/45216#discussion_r1498999850


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala:
##########
@@ -174,4 +174,291 @@ class CollationSuite extends QueryTest with 
SharedSparkSession {
           Row(expected))
     }
   }
+
+  test("Support contains string expression with Collation") {
+    // Test 'contains' with different collations
+    var listLeft: List[String] = List()
+    var listRight: List[String] = List()
+    var listResult: List[Boolean] = List()
+
+    // UCS_BASIC (default) & UNICODE collation
+    listLeft = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC", "CDE", 
"ABDE", "ABCDE")
+    listRight = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC", 
"CDE", "ABDE", "ABCDE")
+    listResult = List(
+    //  ""     c     abc    cde   abde  abcde    C     ABC    CDE    ABDE  
ABCDE
+      true, false, false, false, false, false, false, false, false, false, 
false, //  ""
+      true, true, false, false, false, false, false, false, false, false, 
false,  //   c
+      true, true, true, false, false, false, false, false, false, false, 
false,   // abc
+      true, true, false, true, false, false, false, false, false, false, 
false,   //   cde
+      true, false, false, false, true, false, false, false, false, false, 
false,  // abde
+      true, true, true, true, false, true, false, false, false, false, false,  
   // abcde
+      true, false, false, false, false, false, true, false, false, false, 
false,  //   C
+      true, false, false, false, false, false, true, true, false, false, 
false,   // ABC
+      true, false, false, false, false, false, true, false, true, false, 
false,   //   CDE
+      true, false, false, false, false, false, false, false, false, true, 
false,  // ABDE
+      true, false, false, false, false, false, true, true, true, false, true)  
   // ABCDE
+    for {
+      (left, index_left) <- listLeft.zipWithIndex
+      (right, index_right) <- listRight.zipWithIndex
+    } {
+      val expectedAnswer = listResult(index_left * listRight.length + 
index_right)
+      // UCS_BASIC (default)
+      checkAnswer(sql("SELECT contains('" + left + "', '" + right + "')"), 
Row(expectedAnswer))
+      // UCS_BASIC
+      checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+        right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT contains(collate('" + left + "', 'UCS_BASIC'), 
collate('" +
+        right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+      // UNICODE
+      checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+        right + "', 'UNICODE'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT contains(collate('" + left + "', 'UNICODE'), 
collate('" +
+        right + "', 'UNICODE'))"), Row(expectedAnswer))
+    }
+
+
+    // UCS_BASIC_LCASE & UNICODE_CI collation
+    listResult = List(
+    //  ""     c     abc    cde   abde  abcde    C     ABC    CDE    ABDE  
ABCDE
+      true, false, false, false, false, false, false, false, false, false, 
false, //  ""
+      true, true, false, false, false, false, true, false, false, false, 
false,   //   c
+      true, true, true, false, false, false, true, true, false, false, false,  
   // abc
+      true, true, false, true, false, false, true, false, true, false, false,  
   //    cde
+      true, false, false, false, true, false, false, false, false, true, 
false,   // abde
+      true, true, true, true, false, true, true, true, true, false, true,      
   // abcde
+      true, true, false, false, false, false, true, false, false, false, 
false,   //   C
+      true, true, true, false, false, false, true, true, false, false, false,  
   // ABC
+      true, true, false, true, false, false, true, false, true, false, false,  
   //   CDE
+      true, false, false, false, true, false, false, false, false, true, 
false,   // ABDE
+      true, true, true, true, false, true, true, true, true, false, true)      
   // ABCDE
+    for {
+      (left, index_left) <- listLeft.zipWithIndex
+      (right, index_right) <- listRight.zipWithIndex
+    } {
+      val expectedAnswer = listResult(index_left * listRight.length + 
index_right)
+      // UCS_BASIC_LCASE
+      checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+        right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT contains(collate('" + left + "', 
'UCS_BASIC_LCASE'), collate('" +
+        right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+      // UNICODE_CI
+      checkAnswer(sql("SELECT contains('" + left + "', collate('" +
+        right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT contains(collate('" + left + "', 'UNICODE_CI'), 
collate('" +
+        right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+    }
+  }
+
+    test("Support startsWith string expression with Collation") {
+    // Test 'startsWith' with different collations
+    var listLeft: List[String] = List()
+    var listRight: List[String] = List()
+    var listResult: List[Boolean] = List()
+
+    // UCS_BASIC (default) & UNICODE collation
+    listLeft = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC", "CDE", 
"ABDE", "ABCDE")
+    listRight = List("", "c", "abc", "cde", "abde", "abcde", "C", "ABC", 
"CDE", "ABDE", "ABCDE")
+    listResult = List(
+    //  ""     c     abc    cde   abde  abcde    C     ABC    CDE    ABDE  
ABCDE
+      true, false, false, false, false, false, false, false, false, false, 
false, //  ""
+      true, true, false, false, false, false, false, false, false, false, 
false,  //   c
+      true, false, true, false, false, false, false, false, false, false, 
false,  // abc
+      true, true, false, true, false, false, false, false, false, false, 
false,   //   cde
+      true, false, false, false, true, false, false, false, false, false, 
false,  // abde
+      true, false, true, false, false, true, false, false, false, false, 
false,   // abcde
+      true, false, false, false, false, false, true, false, false, false, 
false,  //   C
+      true, false, false, false, false, false, false, true, false, false, 
false,  // ABC
+      true, false, false, false, false, false, true, false, true, false, 
false,   //   CDE
+      true, false, false, false, false, false, false, false, false, true, 
false,  // ABDE
+      true, false, false, false, false, false, false, true, false, false, 
true)   // ABCDE
+    for {
+      (left, index_left) <- listLeft.zipWithIndex
+      (right, index_right) <- listRight.zipWithIndex
+    } {
+      val expectedAnswer = listResult(index_left * listRight.length + 
index_right)
+      // UCS_BASIC (default)
+      checkAnswer(sql("SELECT startswith('" + left + "', '" + right + "')"), 
Row(expectedAnswer))
+      // UCS_BASIC
+      checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+        right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT startswith(collate('" + left + "', 'UCS_BASIC'), 
collate('" +
+        right + "', 'UCS_BASIC'))"), Row(expectedAnswer))
+      // UNICODE
+      checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+        right + "', 'UNICODE'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT startswith(collate('" + left + "', 'UNICODE'), 
collate('" +
+        right + "', 'UNICODE'))"), Row(expectedAnswer))
+    }
+
+    // UCS_BASIC_LCASE & UNICODE_CI collation
+    listResult = List(
+    //  ""     c     abc    cde   abde  abcde    C     ABC    CDE    ABDE  
ABCDE
+      true, false, false, false, false, false, false, false, false, false, 
false, //  ""
+      true, true, false, false, false, false, true, false, false, false, 
false,   //   c
+      true, false, true, false, false, false, false, true, false, false, 
false,   // abc
+      true, true, false, true, false, false, true, false, true, false, false,  
   //   cde
+      true, false, false, false, true, false, false, false, false, true, 
false,   // abde
+      true, false, true, false, false, true, false, true, false, false, true,  
   // abcde
+      true, true, false, false, false, false, true, false, false, false, 
false,   //   C
+      true, false, true, false, false, false, false, true, false, false, 
false,   // ABC
+      true, true, false, true, false, false, true, false, true, false, false,  
   //   CDE
+      true, false, false, false, true, false, false, false, false, true, 
false,   // ABDE
+      true, false, true, false, false, true, false, true, false, false, true)  
   // ABCDE
+    for {
+      (left, index_left) <- listLeft.zipWithIndex
+      (right, index_right) <- listRight.zipWithIndex
+    } {
+      val expectedAnswer = listResult(index_left * listRight.length + 
index_right)
+      // UCS_BASIC_LCASE
+      checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+        right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT startswith(collate('" + left + "', 
'UCS_BASIC_LCASE'), collate('" +
+        right + "', 'UCS_BASIC_LCASE'))"), Row(expectedAnswer))
+      // UNICODE_CI
+      checkAnswer(sql("SELECT startswith('" + left + "', collate('" +
+        right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+      checkAnswer(sql("SELECT startswith(collate('" + left + "', 
'UNICODE_CI'), collate('" +
+        right + "', 'UNICODE_CI'))"), Row(expectedAnswer))
+    }
+
+    // Serbian language collation tests

Review Comment:
   Of course! Removed in new commit



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to