nikolamand-db commented on code in PR #46180:
URL: https://github.com/apache/spark/pull/46180#discussion_r1606706322


##########
common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala:
##########
@@ -152,4 +219,147 @@ class CollationFactorySuite extends AnyFunSuite with 
Matchers { // scalastyle:ig
       }
     })
   }
+
+  test("test collation caching") {
+    Seq(
+      "UTF8_BINARY",
+      "UTF8_BINARY_LCASE",
+      "UTF8_BINARY_UCASE",
+      "UNICODE",
+      "UNICODE_LCASE",
+      "UNICODE_UCASE",
+      "UNICODE_CI",
+      "UNICODE_AI_CI",
+      "UNICODE_AI_CI_LCASE",
+      "UNICODE_AI_CI_UCASE"
+    ).foreach(collationId => {
+      val col1 = fetchCollation(collationId)
+      val col2 = fetchCollation(collationId)
+      assert(col1 eq col2) // reference equality
+    })
+  }
+
+  test("collations with ICU non-root localization") {
+    Seq(
+      // language only
+      "en",
+      "en_CS",
+      "en_CI",
+      "en_AS",
+      "en_AI",
+      "en_LCASE",
+      "en_UCASE",
+      // language + 3-letter country code
+      "en_USA",
+      "en_USA_CS",
+      "en_USA_CI",
+      "en_USA_AS",
+      "en_USA_AI",
+      "en_USA_LCASE",
+      "en_USA_UCASE",
+      // language + script code
+      "sr_Cyrl",
+      "sr_Cyrl_CS",
+      "sr_Cyrl_CI",
+      "sr_Cyrl_AS",
+      "sr_Cyrl_AI",
+      "sr_Cyrl_LCASE",
+      "sr_Cyrl_UCASE",
+      // language + script code + 3-letter country code
+      "sr_Cyrl_SRB",
+      "sr_Cyrl_SRB_CS",
+      "sr_Cyrl_SRB_CI",
+      "sr_Cyrl_SRB_AS",
+      "sr_Cyrl_SRB_AI",
+      "sr_Cyrl_SRB_LCASE",
+      "sr_Cyrl_SRB_UCASE"
+    ).foreach(collationICU => {
+      val col = fetchCollation(collationICU)
+      assert(col.collator.getLocale(ULocale.VALID_LOCALE) != ULocale.ROOT)
+    })
+  }
+
+  test("invalid names of collations with ICU non-root localization") {
+    Seq(
+      "en_US", // must use 3-letter country code
+      "enn",
+      "en_AAA",
+      "en_Something",
+      "en_Something_USA",
+      "en_Latn_USA", // use en_USA instead
+      "en_Cyrl_USA",
+      "en_USA_AAA",
+      "sr_Cyrl_SRB_AAA"
+    ).foreach(collationName => {
+      val error = intercept[SparkException] {
+        fetchCollation(collationName)
+      }
+
+      assert(error.getErrorClass === "COLLATION_INVALID_NAME")
+      assert(error.getMessageParameters.asScala === Map("collationName" -> 
collationName))
+    })
+  }
+
+  test("collations name normalization for ICU non-root localization") {
+    Seq(
+      ("en_USA", "en_USA"),
+      ("en_CS", "en"),
+      ("en_AS", "en"),
+      ("en_CS_AS", "en"),
+      ("en_AI_CI", "en_CI_AI"),
+      ("en_USA_AI_CI", "en_USA_CI_AI"),
+      // randomized case
+      ("EN_USA", "en_USA"),
+      ("eN_usA_ci_uCASe_aI", "en_USA_CI_AI_UCASE"),
+      ("SR_CYRL", "sr_Cyrl"),
+      ("sr_cyrl_srb", "sr_Cyrl_SRB"),
+      ("sR_cYRl_sRb", "sr_Cyrl_SRB")
+    ).foreach {
+      case (name, normalized) =>
+        val col = fetchCollation(name)
+        assert(col.collationName == normalized)
+    }
+  }
+
+  test("invalid collationId") {
+    val badCollationIds = Seq(
+      -1, // user-defined collation range

Review Comment:
   Outdated, new approach is 
https://github.com/apache/spark/pull/46180/files#diff-9c12d32db9d55dd6ecb5b10f2fc57c7ba7de7275cab57bf157fa42cbc09f3876R331.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to