[phoenix] 03/03: PHOENIX-6818 Remove dependency on the i18n-util library

stoty Fri, 18 Nov 2022 02:08:33 -0800

This is an automated email from the ASF dual-hosted git repository.

stoty pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/phoenix.git


commit e4861a3fe9e972a85cd7bd431ab886d19168a15f
Author: Mate Szalay-Beko <sy...@apache.org>
AuthorDate: Thu Nov 10 16:11:49 2022 +0100

    PHOENIX-6818 Remove dependency on the i18n-util library
    
    i18n-util is not maintained anymore, but uses icu4j dependencies having CVE
    issues. To avoid these problems, I copied the relevant code from
    i18n-util and used the latest icu4j version.
---
 dev/release_files/LICENSE                          |    2 +-
 phoenix-core/pom.xml                               |    8 +-
 .../expression/function/CollationKeyFunction.java  |   12 +-
 .../phoenix/expression/function/LowerFunction.java |    3 +-
 .../phoenix/expression/function/UpperFunction.java |   21 +-
 .../apache/phoenix/util/DeferredStringBuilder.java |  135 +++
 .../apache/phoenix/util/i18n/LinguisticSort.java   | 1172 ++++++++++++++++++++
 .../org/apache/phoenix/util/i18n/LocaleUtils.java  |   86 ++
 .../org/apache/phoenix/util/i18n/OracleUpper.java  |   82 ++
 .../apache/phoenix/util/i18n/OracleUpperTable.java |  337 ++++++
 .../org/apache/phoenix/util/i18n/package-info.java |   27 +
 .../phoenix/util/i18n/LinguisticSortTest.java      |  650 +++++++++++
 .../util/i18n/OracleUpperTableGeneratorTest.java   |  391 +++++++
 pom.xml                                            |   13 +-
 14 files changed, 2912 insertions(+), 27 deletions(-)

diff --git a/dev/release_files/LICENSE b/dev/release_files/LICENSE
index 4577518c7a..c3c68268f8 100644
--- a/dev/release_files/LICENSE
+++ b/dev/release_files/LICENSE
@@ -254,7 +254,7 @@ Janino Compiler (https://github.com/janino-compiler/janino)
 
 Hamcrest-core 1.3 (http://www.hamcrest.org) Copyright (c) 2000-2006, 
www.hamcrest.org
 
-i18n-util 1.0.1 (https://github.com/salesforce/i18n-util) Copyright (c) 2017, 
Salesforce.com, Inc. All rights reserved.
+icu4j (https://github.com/unicode-org/icu) Copyright (c) 2016 and later 
Unicode, Inc. and others. All Rights Reserved.
 
 ---
 
diff --git a/phoenix-core/pom.xml b/phoenix-core/pom.xml
index 3df5ef68a2..f1079105d3 100644
--- a/phoenix-core/pom.xml
+++ b/phoenix-core/pom.xml
@@ -549,8 +549,12 @@
       <artifactId>stream</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.salesforce.i18n</groupId>
-      <artifactId>i18n-util</artifactId>
+      <groupId>com.ibm.icu</groupId>
+      <artifactId>icu4j</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.ibm.icu</groupId>
+      <artifactId>icu4j-localespi</artifactId>
     </dependency>
     <dependency>
       <groupId>com.lmax</groupId>
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java
index f5cbdc4557..676b6460df 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java
@@ -35,11 +35,11 @@ import org.apache.phoenix.schema.types.PInteger;
 import org.apache.phoenix.schema.types.PVarbinary;
 import org.apache.phoenix.schema.types.PVarchar;
 import org.apache.phoenix.util.VarBinaryFormatter;
+import org.apache.phoenix.util.i18n.LinguisticSort;
+import org.apache.phoenix.util.i18n.LocaleUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.force.db.i18n.LinguisticSort;
-import com.force.i18n.LocaleUtils;
 
 /**
  * A Phoenix Function that calculates a collation key for an input string based
@@ -51,10 +51,12 @@ import com.force.i18n.LocaleUtils;
  * are all valid locale representations. Note the language code, country code
  * and variant are used as arguments to the constructor of java.util.Locale.
  *
- * This function uses the open-source i18n-util package to obtain the collators
- * it needs from the provided locale.
+ * This function originally used the open-source i18n-util package to obtain 
the
+ * collators it needs from the provided locale. As i18n-util is not maintained
+ * anymore, the relevant parts from it were copied into Phoenix.
+ * See: https://issues.apache.org/jira/browse/PHOENIX-6818
  *
- * The LinguisticSort implementation in i18n-util encapsulates sort-related
+ * The LinguisticSort implementation from i18n-util encapsulates sort-related
  * functionality for a substantive list of locales. For each locale, it 
provides
  * a collator and an Oracle-specific database function that can be used to sort
  * strings according to the natural language rules of that locale.
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java
index f444d36b5f..264ebfbb79 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java
@@ -30,8 +30,7 @@ import org.apache.phoenix.parse.FunctionParseNode;
 import org.apache.phoenix.schema.tuple.Tuple;
 import org.apache.phoenix.schema.types.PDataType;
 import org.apache.phoenix.schema.types.PVarchar;
-
-import com.force.i18n.LocaleUtils;
+import org.apache.phoenix.util.i18n.LocaleUtils;
 
 @FunctionParseNode.BuiltInFunction(name=LowerFunction.NAME,  args={
         @FunctionParseNode.Argument(allowedTypes={PVarchar.class}),
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java
index 0969269ba6..56a228c6dd 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java
@@ -1,11 +1,10 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -15,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.phoenix.expression.function;
 
 import java.io.DataInput;
@@ -25,15 +23,12 @@ import java.util.List;
 import java.util.Locale;
 
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-
 import org.apache.phoenix.expression.Expression;
 import org.apache.phoenix.parse.FunctionParseNode;
+import org.apache.phoenix.schema.tuple.Tuple;
 import org.apache.phoenix.schema.types.PDataType;
 import org.apache.phoenix.schema.types.PVarchar;
-
-import com.force.i18n.LocaleUtils;
-
-import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.util.i18n.LocaleUtils;
 
 @FunctionParseNode.BuiltInFunction(name=UpperFunction.NAME,  args={
         @FunctionParseNode.Argument(allowedTypes={PVarchar.class}),
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/util/DeferredStringBuilder.java 
b/phoenix-core/src/main/java/org/apache/phoenix/util/DeferredStringBuilder.java
new file mode 100644
index 0000000000..45dec5c22e
--- /dev/null
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/util/DeferredStringBuilder.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util;
+
+/**
+ * This utility class was partially copied from Salesforce's 
internationalization utility library
+ * (com.salesforce.i18n:i18n-util:1.0.4), which was released under the 
3-clause BSD License.
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ *
+ * This class implements a StringBuilder that is incrementally copied from a 
source String.
+ * Actual creation the new buffer is deferred until a character differs from a 
character at
+ * the same position in the source String.  This class is useful for reducing 
garbage creation
+ * when doing operations like escaping a String, when most Strings are not 
expected to contain
+ * any escapable characters.  In that case, no additional memory is used (as 
the original
+ * String is not actually copied).
+ */
+public final class DeferredStringBuilder implements Appendable, CharSequence {
+
+    private StringBuilder buf;
+    private int pos;
+    private final CharSequence source;
+
+    public DeferredStringBuilder(CharSequence source) {
+        if (source == null) {
+            this.buf = new StringBuilder(16);
+        }
+        this.source = source;
+    }
+
+    public DeferredStringBuilder append(char c) {
+        if (this.buf == null) {
+            if (this.pos < this.source.length() && c == 
this.source.charAt(this.pos)) {
+                // characters match - just move ahead
+                ++this.pos;
+            } else {
+                // doh - character mismatch - now we need to allocate a real 
StringBuilder
+                this.buf = new StringBuilder(this.source.length() + 16);
+                this.buf.append(this.source.subSequence(0, this.pos));
+                this.buf.append(c);
+            }
+        } else {
+            // we've already got the buf - just add this character
+            this.buf.append(c);
+        }
+        return this;
+    }
+
+    public DeferredStringBuilder append(CharSequence csq) {
+        if (csq == null) {
+            return this;
+        }
+        return append(csq, 0, csq.length());
+    }
+
+    public DeferredStringBuilder append(CharSequence csq, int start, int end) {
+        if (csq != null) {
+            if (buf == null) {
+                int chars = end - start;
+                // For small strings or overflow, do it char by char.
+                if (chars < 10 || (this.pos + chars > this.source.length())) {
+                    for (int i = start; i < end; ++i) {
+                        append(csq.charAt(i));
+                    }
+                } else {
+                    CharSequence subSeq = csq.subSequence(start, end);
+                    //String.equals seems to get optimized a lot quicker than 
the
+                    // chartA + length + loop method. I don't think this will 
matter at all,
+                    // but between this and OptimizedURLEncoder, this made 
these classes
+                    // disappear from my profiler
+                    if (this.source.subSequence(this.pos, this.pos + 
chars).equals(subSeq)) {
+                        this.pos += chars;
+                    } else {
+                        this.buf = new StringBuilder(this.source.length() + 
16);
+                        this.buf.append(this.source.subSequence(0, this.pos));
+                        this.buf.append(subSeq);
+                    }
+                }
+            } else {
+                // We know it's different, so just append the whole string.
+                buf.append(csq, start, end);
+            }
+        }
+        return this;
+    }
+
+    public char charAt(int index) {
+        if (this.buf != null) {
+            return this.buf.charAt(index);
+        } else if (index < pos) {
+            return this.source.charAt(index);
+        } else {
+            throw new StringIndexOutOfBoundsException(index);
+        }
+    }
+
+    public CharSequence subSequence(int start, int end) {
+        if (this.buf != null) {
+            return this.buf.subSequence(start, end);
+        } else if (end <= pos) {
+            return this.source.subSequence(start, end);
+        } else {
+            throw new StringIndexOutOfBoundsException(end);
+        }
+    }
+
+    @Override
+    public String toString() {
+        if (this.buf != null) {
+            return this.buf.toString();
+        }
+        if (this.pos == this.source.length()) {
+            return this.source.toString();
+        }
+        return this.source.subSequence(0, this.pos).toString();
+    }
+
+    public int length() {
+        return this.buf != null ? this.buf.length() : this.pos;
+    }
+}
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/LinguisticSort.java 
b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/LinguisticSort.java
new file mode 100644
index 0000000000..c1881c6440
--- /dev/null
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/LinguisticSort.java
@@ -0,0 +1,1172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util.i18n;
+
+import java.text.CollationKey;
+import java.text.Collator;
+import java.text.MessageFormat;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.phoenix.util.DeferredStringBuilder;
+
+import com.ibm.icu.impl.jdkadapter.CollatorICU;
+import com.ibm.icu.text.AlphabeticIndex;
+import com.ibm.icu.util.ULocale;
+
+import edu.umd.cs.findbugs.annotations.SuppressWarnings;
+
+
+/**
+ * This utility class was partially copied from Salesforce's 
internationalization utility library
+ * (com.salesforce.i18n:i18n-util:1.0.4), which was released under the 
3-clause BSD License.
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ *
+ * Contains all the information about linguistic sorting.
+ * The intent of this is to provide the SQL changes to the RDBMS to ensure
+ * that the sorting uses the locale provided in Java, and to make sure that
+ * the collation in Java will correspond as much as possible to what is in the
+ * DB.
+ *
+ * Rolodex is a feature in alphabetic/syllabary languages to restrict the set
+ * of rows in a list to those that start with a certain letter.  In SQL
+ * this is usually LIKE 'A%', which will include different letters.
+ *
+ * To get the list of valid nls_sorts, run this in oracle
+ * select value from v$nls_valid_values where parameter='SORT';
+ */
+public enum LinguisticSort {
+    // English:
+    //   Using oracle's upper() function to sort; digits come before letters,
+    //   '[' is the lowest character after 'Z'.  //  balance-]
+    ENGLISH(Locale.ENGLISH, "[", false, false, 
LinguisticSort.Alphabets.STRING), //  balance-]
+
+    // German:
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    GERMAN(new Locale("de"), LinguisticSort.Alphabets.GERMAN, "0", true, false,
+           "nlssort({0}, ''nls_sort=xgerman'')"),
+
+    // French:
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    FRENCH(new Locale("fr"), "0", false, false, "nlssort({0}, 
''nls_sort=xfrench'')"),
+
+    // Italian:
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    ITALIAN(new Locale("it"), "0", false, false, "nlssort({0}, 
''nls_sort=italian'')"),
+
+    // Spanish:
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    //   Alphabet consists of A-Z plus N-tilde.  However, CH and LL are not 
considered
+    //   letters, so do not use Oracle's xspanish nlssort.
+    SPANISH(new Locale("es"), "0", false, false, "nlssort({0}, 
''nls_sort=spanish'')"),
+
+    // Catalan:
+    //   Using oracle's nlssort() function to sort; digits come before letters,
+    //   nothing sorts after the last legal catalan character.
+    CATALAN(new Locale("ca"), LinguisticSort.Alphabets.CATALAN, "0", true, 
false,
+            "nlssort({0}, ''nls_sort=catalan'')"),
+
+    // Dutch:
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    DUTCH(new Locale("nl"), "0", false, false, "nlssort({0}, 
''nls_sort=dutch'')"),
+
+    // Portuguese:
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    PORTUGUESE(new Locale("pt"), "0", false, false, "nlssort({0}, 
''nls_sort=west_european'')"),
+
+    // Danish:
+    //   Alphabet consists of A-Z followed by AE, O-stroke, and A-ring.
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    DANISH(new Locale("da"), "0", false, false, "nlssort({0}, 
''nls_sort=danish'')"),
+
+    // Norwegian:
+    //   Alphabet consists of A-Z followed by AE, O-stroke, and A-ring.
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    NORWEGIAN(new Locale("no"), "0", false, false,
+            "nlssort({0}, ''nls_sort=norwegian'')"),
+
+    // Swedish:
+    //   Alphabet consists of A-Z followed by A-ring, A-diaeresis, and 
O-diaeresis.
+    //   Using oracle's nlssort() function to sort; digits come before letters,
+    //   nothing sorts after the last legal swedish character.
+    SWEDISH(new Locale("sv"), null, false, false,
+            "nlssort({0}, ''nls_sort=swedish'')"),
+
+    // Finnish:
+    //   Alphabet consists of A-Z, minus W, followed by A-ring, A-diaeresis, 
and O-diaeresis.
+    //   We leave out W so that V's show up properly (bug #151961/W-513969)
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    FINNISH(new Locale("fi"),
+            new String[] {
+                "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", 
"M", "N", "O", "P",
+                "Q", "R", "S", "T", "U", "V", "X", "Y", "Z", "\u00C5", 
"\u00C4", "\u00D6" },
+            "0", false, false, "nlssort({0}, ''nls_sort=finnish'')"),
+
+    // Czech:
+    //   Alphabet consists of many Czech letters but not all english letters.
+    //   Using oracle's nlssort() function to sort; digits come right after 
letters.
+    CZECH(new Locale("cs"), "0", true, false,
+            "nlssort({0}, ''nls_sort=xczech'')"),
+
+    // Polish:
+    //   Alphabet consists of many Polish letters but not all english letters.
+    //   Using oracle's nlssort() function to sort.
+    POLISH(new Locale("pl"), "\u00DF", false, false,
+            "nlssort({0}, ''nls_sort=polish'')"),
+
+    // Turkish:
+    //   Use Turkish alphabet, which also indicates special handling in 
getUpperCaseValue().
+    //   Using oracle's nlssort() function to sort.
+    TURKISH(new Locale("tr"), LinguisticSort.Alphabets.TURKISH, null, false, 
false,
+            "nlssort({0}, ''nls_sort=xturkish'')"),
+
+    // Traditional chinese:
+    //   Use English alphabet. Using oracle's nlssort() function to sort by 
stroke.
+    CHINESE_HK(new Locale("zh", "HK"), LinguisticSort.Alphabets.ENGLISH, 
"\u03B1", true, true,
+            "nlssort({0}, ''nls_sort=tchinese_radical_m'')"),
+    CHINESE_HK_STROKE(new Locale("zh", "HK", "STROKE"), 
LinguisticSort.Alphabets.ENGLISH, "\u03B1",
+            true, true, "nlssort({0}, ''nls_sort=tchinese_stroke_m'')"),
+
+    CHINESE_TW(new Locale("zh", "TW"), LinguisticSort.Alphabets.ENGLISH, 
"\u03B1", true, true,
+            "nlssort({0}, ''nls_sort=tchinese_radical_m'')"),
+    CHINESE_TW_STROKE(new Locale("zh", "TW", "STROKE"), 
LinguisticSort.Alphabets.ENGLISH, "\u03B1",
+            true, true, "nlssort({0}, ''nls_sort=tchinese_stroke_m'')"),
+
+
+    // Simplified chinese:
+    //   Use English alphabet. Using oracle's nlssort() function to sort by 
pinyin.
+    CHINESE(new Locale("zh"), LinguisticSort.Alphabets.ENGLISH, "\u03B1", 
true, true,
+            "nlssort({0}, ''nls_sort=schinese_radical_m'')"),
+    CHINESE_STROKE(new Locale("zh", "", "STROKE"), 
LinguisticSort.Alphabets.ENGLISH, "\u03B1",
+            true, true,
+            "nlssort({0}, ''nls_sort=schinese_stroke_m'')"),
+    CHINESE_PINYIN(new Locale("zh", "", "PINYIN"), 
LinguisticSort.Alphabets.ENGLISH, "\u03B1",
+            true, true,
+            "nlssort({0}, ''nls_sort=schinese_pinyin_m'')"),
+
+
+    // Japanese:
+    //   Japanese alphabet. Using oracle's nlssort() function to sort. Special 
rolodex handling
+    JAPANESE(new Locale("ja"), LinguisticSort.Alphabets.JAPANESE, null, true, 
true,
+            "nlssort({0}, ''nls_sort=japanese_m'')"),
+
+    // Korean:
+    //   Use English alphabet. Using oracle's nlssort() function to sort.
+    KOREAN(new Locale("ko"), LinguisticSort.Alphabets.ENGLISH, "\u03B1", true, 
true,
+            "nlssort({0}, ''nls_sort=korean_m'')"),
+
+    // Russian:
+    //   Using oracle's nlssort() function to sort.
+    RUSSIAN(new Locale("ru"), null, false, false,
+            "nlssort({0}, ''nls_sort=russian'')"),
+
+    // Bulgarian:
+    //   Using oracle's nlssort() function to sort.
+    BULGARIAN(new Locale("bg"), LinguisticSort.Alphabets.BULGARIAN, null, 
true, false,
+            "nlssort({0}, ''nls_sort=bulgarian'')"),
+
+    // Indonesian
+    //   Using oracle's nlssort() function to sort.
+    INDONESIAN(new Locale("in"), null, true, false, "nlssort({0}, 
''nls_sort=indonesian'')"),
+
+    // Romanian:
+    //   Using oracle's nlssort() function to sort.
+    ROMANIAN(new Locale("ro"),
+             new String[] { "A", "\u0102", "\u00c2", "B", "C", "D", "E", "F", 
"G", "H", "I",
+                 "\u00ce", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", 
"\u015e", "T",
+                 "\u0162", "U", "V", "W", "X", "Y", "Z" },
+            null, true, false, "nlssort({0}, ''nls_sort=romanian'')"),
+
+    // Vietnamese
+    //   Using oracle's nlssort() function to sort.
+    VIETNAMESE(new Locale("vi"),
+               new String[] {
+                   "A", "\u0102", "\u00c2", "B", "C", "D", "\u0110", "E", 
"\u00ca", "G", "H",
+                   "I", "K", "L", "M", "N", "O", "\u00d4", "\u01a0", "P", "Q", 
"R", "S", "T",
+                   "U", "\u01af", "V", "X", "Y" },
+            null, false, false, "nlssort({0}, ''nls_sort=vietnamese'')"),
+
+    // Ukrainian:
+    //   Using oracle's nlssort() function to sort.
+    UKRAINIAN(new Locale("uk"), null, false, false, "nlssort({0}, 
''nls_sort=ukrainian'')"),
+
+    // Hungarian:
+    //   Using oracle's nlssort() function to sort.
+    HUNGARIAN(new Locale("hu"), LinguisticSort.Alphabets.HUNGARIAN, null, 
false, false,
+            "nlssort({0}, ''nls_sort=xhungarian'')"),
+
+    // Greek:
+    //   Using oracle's nlssort() function to sort.
+    GREEK(new Locale("el"), null, false, false, "nlssort({0}, 
''nls_sort=greek'')"),
+
+    // Hebrew:
+    //   Using oracle's nlssort() function to sort.
+    HEBREW(new Locale("iw"), null, true, false, "nlssort({0}, 
''nls_sort=hebrew'')"),
+
+    // Slovak:
+    //   Using oracle's nlssort() function to sort.
+    SLOVAK(new Locale("sk"), LinguisticSort.Alphabets.SLOVAK, null, true, 
false,
+            "nlssort({0}, ''nls_sort=slovak'')"),
+
+    // Serbian (cyrillic):
+    //   Using oracle's nlssort() function to sort using it's default
+    SERBIAN_CYRILLIC(new Locale("sr"), null, false, false,
+            "nlssort({0}, ''nls_sort=generic_m'')"),
+
+    // Serbian (cyrillic):
+    //   Using oracle's nlssort() function to sort using it's default
+    SERBIAN_LATIN(new Locale("sh"), LinguisticSort.Alphabets.SERBIAN_LATIN, 
null, false, false,
+            "nlssort({0}, ''nls_sort=xcroatian'')"),
+
+    // Serbian (cyrillic):
+    //   Using oracle's nlssort() function to sort using it's default
+    BOSNIAN(new Locale("bs"), LinguisticSort.Alphabets.SERBIAN_LATIN, null, 
false, false,
+            "nlssort({0}, ''nls_sort=xcroatian'')"),
+
+
+    // Georgian:
+    //   Using oracle's nlssort() function to sort, even though we're using 
binary for this.
+    GEORGIAN(new Locale("ka"), LinguisticSort.Alphabets.GEORGIAN, null, false, 
false,
+            "nlssort({0}, ''nls_sort=binary'')"),
+
+    // BASQUE:
+    //   Using oracle's nlssort() function to sort.
+    BASQUE(new Locale("eu"),  LinguisticSort.Alphabets.BASQUE, null, false, 
false,
+            "nlssort({0}, ''nls_sort=west_european'')"),
+
+    // MALTESE:
+    //   Using oracle's nlssort() function to sort.
+    MALTESE(new Locale("mt"), null, false, false, "nlssort({0}, 
''nls_sort=west_european'')"),
+
+    // ROMANSH:
+    //   Using oracle's nlssort() function to sort.
+    ROMANSH(new Locale("rm"), null, false, false, "nlssort({0}, 
''nls_sort=west_european'')"),
+
+    // LUXEMBOURGISH:
+    //   Using oracle's nlssort() function to sort.
+    LUXEMBOURGISH(new Locale("lb"), LinguisticSort.Alphabets.LUXEMBOURGISH, 
null, false, false,
+            "nlssort({0}, ''nls_sort=west_european'')"),
+
+    // IRISH:
+    //   Using oracle's nlssort() function to sort.
+    IRISH(new Locale("ga"),  null, false, false, "nlssort({0}, 
''nls_sort=west_european'')"),
+
+    // Slovenian:
+    //   Using oracle's nlssort() function to sort.
+    SLOVENE(new Locale("sl"), LinguisticSort.Alphabets.SLOVENE, null, false, 
false,
+            "nlssort({0}, ''nls_sort=xslovenian'')"),
+
+    // Croatian:
+    //   Using oracle's nlssort() function to sort.
+    CROATIAN(new Locale("hr"), LinguisticSort.Alphabets.SERBIAN_LATIN, null, 
false, false,
+            "nlssort({0}, ''nls_sort=xcroatian'')"),
+
+    // Malay
+    //   Using oracle's nlssort() function to sort.
+    //   We're assuming people are using the english alphabet,
+    //   and not the arabic one (Bahasa Melayu)
+    MALAY(new Locale("ms"), null, true, false, "nlssort({0}, 
''nls_sort=malay'')"),
+
+    // Arabic:
+    //   Using oracle's nlssort() function to sort.
+    ARABIC(new Locale("ar"), null, false, false, "nlssort({0}, 
''nls_sort=arabic'')"),
+
+    // Estonian:
+    //   Using oracle's nlssort() function to sort.
+    ESTONIAN(new Locale("et"), LinguisticSort.Alphabets.ESTONIAN, null, true, 
false,
+            "nlssort({0}, ''nls_sort=estonian'')"),
+
+    // Icelandic:
+    //   Using oracle's nlssort() function to sort.
+    ICELANDIC(new Locale("is"), LinguisticSort.Alphabets.ICELANDIC, null, 
true, false,
+            "nlssort({0}, ''nls_sort=icelandic'')"),
+
+    // Latvian:
+    //   Using oracle's nlssort() function to sort.
+    LATVIAN(new Locale("lv"), LinguisticSort.Alphabets.LATVIAN, null, false, 
false,
+            "nlssort({0}, ''nls_sort=latvian'')"),
+
+    // Lithuanian:
+    //   Using oracle's nlssort() function to sort.
+    LITHUANIAN(new Locale("lt"), LinguisticSort.Alphabets.LITHUANIAN, null, 
false, false,
+            "nlssort({0}, ''nls_sort=lithuanian'')"),
+
+
+    // Languages not supported fully.
+    KYRGYZ(new Locale("ky"), LinguisticSort.Alphabets.KYRGYZ, null, true, 
false,
+            "nlssort({0}, ''nls_sort=binary'')"),
+
+    KAZAKH(new Locale("kk"), LinguisticSort.Alphabets.KAZAKH, null, true, 
false,
+            "nlssort({0}, ''nls_sort=binary'')"),
+
+    TAJIK(new Locale("tg"), LinguisticSort.Alphabets.TAJIK, null, true, false,
+            "nlssort({0}, ''nls_sort=russian'')"),
+
+    BELARUSIAN(new Locale("be"), null, true, false, "nlssort({0}, 
''nls_sort=russian'')"),
+
+    TURKMEN(new Locale("tk"), LinguisticSort.Alphabets.TURKISH, null, false, 
false,
+            "nlssort({0}, ''nls_sort=xturkish'')"),
+
+    AZERBAIJANI(new Locale("az"), LinguisticSort.Alphabets.AZERBAIJANI, null, 
false, false,
+            "nlssort({0}, ''nls_sort=xturkish'')"),
+
+    ARMENIAN(new Locale("hy"), null, true, false, "nlssort({0}, 
''nls_sort=binary'')"),
+
+    THAI(new Locale("th"), null, true, false, "nlssort({0}, 
''nls_sort=thai_dictionary'')"),
+
+    // Binary?  really
+    HINDI(new Locale("hi"), null, true, false, "nlssort({0}, 
''nls_sort=binary'')"),
+
+    URDU(new Locale("ur"), LinguisticSort.Alphabets.URDU, null, false, false,
+            "nlssort({0}, ''nls_sort=arabic'')"),
+
+    // Bengali
+    BENGALI(new Locale("bn"), LinguisticSort.Alphabets.BENGALI, null, true, 
false,
+            "nlssort({0}, ''nls_sort=bengali'')"),
+
+    TAMIL(new Locale("ta"), LinguisticSort.Alphabets.TAMIL, null, true, false,
+            "nlssort({0}, ''nls_sort=binary'')"),
+
+    // Unused language for testing; Alphabet and sorting defaults to English
+    ESPERANTO(new Locale("eo"), LinguisticSort.Alphabets.ENGLISH, "[", false, 
false,
+            LinguisticSort.Alphabets.STRING);
+
+    private static final Map<Locale, LinguisticSort> BY_LOCALE = 
getByLocaleInfo();
+
+    /**
+     * Create the map that will be stuffed into BY_LOCALE.  We have to fully 
create an object
+     * THEN stuff into a final field in a constructor (as unmodifiableMap does 
below) in order
+     * to get a proper guarantee from Java's memory model.
+     *
+     * See 
http://jeremymanson.blogspot.com/2008/07/immutability-in-java-part-2.html
+     */
+    private static Map<Locale, LinguisticSort> getByLocaleInfo() {
+        final Map<Locale, LinguisticSort> byLocaleInfo = new HashMap<Locale, 
LinguisticSort>(64);
+        for (LinguisticSort sort : values()) {
+            LinguisticSort duplicated = byLocaleInfo.put(sort.getLocale(), 
sort);
+            assert duplicated == null : "Two linguistic sorts with the same 
locale: "
+                    + sort.getLocale();
+        }
+        return Collections.unmodifiableMap(byLocaleInfo);
+    }
+
+    /**
+     * Get sorting info for the given locale.
+     */
+    public static LinguisticSort get(Locale locale) {
+        // For non-UTF8 dbs, we always interpret everything as English.  (We 
did not set
+        // the page encoding to UTF-8, and thus we may have incorrectly 
encoded data.)
+        // On all other instances, look for the language of the user's locale. 
 This should
+        // succeed because every language we support are listed in data.  But 
just in case,
+        // default to english also.
+        if (IS_MULTI_LINGUAL /*|| TestContext.isRunningTests()*/) {
+            LinguisticSort sort = BY_LOCALE.get(locale);
+            if (sort != null) {
+                return sort;
+            }
+            if (locale.getVariant().length() > 0) {
+                if ("zh".equals(locale.getLanguage())) {
+                    // TW and HK are handled above, this handles SG
+                    if (!"".equals(locale.getLanguage())) {
+                        // This means it's standard.
+                        return get(new Locale(locale.getLanguage(), "", 
locale.getVariant()));
+                    }
+                }
+                return get(new Locale(locale.getLanguage(), 
locale.getLanguage()));
+            }
+            if (locale.getCountry().length() > 0) {
+                sort = BY_LOCALE.get(new Locale(locale.getLanguage()));
+                if (sort != null) {
+                    return sort;
+                }
+            }
+        }
+        return ENGLISH;
+    }
+
+    /**
+     * The locale for this LinguisticSort instance.
+     */
+    private final Locale locale;
+
+    /**
+     * Collator for this LinguisticSort instance.  This may be different than 
the
+     * default collator for its locale.  This is to better match Oracle's nls 
sort
+     * ordering.
+     */
+    private final Collator collator;
+
+    /**
+     * Array of letters (Strings) to show in the rolodex.  An empty array for
+     * alphabet means that the rolodex is not supported for the locale.
+     */
+    private final String[] alphabet;
+
+    /**
+     * An optional String that sorts higher than all letters in the alphabet.
+     * Used when the generating rolodex sql for the last letter.
+     */
+    private final String highValue;
+
+    /**
+     * True normal secondary sorting is reversed, ie, if lower case letters
+     * are sorted before upper case.
+     */
+    private final boolean reverseSecondary;
+
+    /**
+     * True if the locale has double width alphabet, number or symbols,
+     * So we use Oracle's to_single_byte to convert into the half width letter.
+     */
+    private final boolean hasDoubleWidth;
+
+    /**
+     * A MessageFormat pattern for generating an oracle sql expression 
returning the
+     * collation key for sorting a sql expression.  Not used by postgres.
+     */
+    private final String collationKeySql;
+
+    /**
+     * For upper-casing Java values and generating SQL to generate the same. 
Not used by postgres.
+     */
+    private final OracleUpperTable upper;
+
+    /**
+     * Constructor only used when building static data, where ICU should be 
used to derive the
+     * value for the alphabet
+     */
+    LinguisticSort(Locale locale, String highValue, boolean reverseSecondary,
+                   boolean hasDoubleWidth, String collationKeySql) {
+        this(locale, getAlphabetFromICU(locale), highValue, reverseSecondary,
+             hasDoubleWidth, collationKeySql);
+    }
+
+    /**
+     * Mapping for locales and ULocale language tags to use for constructing 
an ICU4J collator.
+     * javac complains if we attempt to refer to a static defined inside the 
same class as an enum,
+     * so we need to use an inner class to have such a constant mapping.
+     */
+    private static final class Icu4jCollatorOverrides {
+        static final Map<Locale, String> OVERRIDES = 
getIcu4jCollatorOverrides();
+
+        /**
+         * ICU4J collator overrides
+         */
+        private static Map<Locale, String> getIcu4jCollatorOverrides() {
+            // Map between a Locale and a BCP47 language tag to use when 
calling ICU4J's
+            // Collator.getInstance(ULocale.forLanguageTag()).
+            Map<Locale, String> overrides = new HashMap<Locale, String>(7);
+
+            // Built-in JDK collators for Chinese are behind the Unicode 
standard, so we need to
+            // override them. See discussion at
+            // https://stackoverflow.com/questions/33672422
+            //   /wrong-sorting-with-collator-using-locale-simplified-chinese
+            // Also see the following JDK collator bugs:
+            // https://bugs.openjdk.java.net/browse/JDK-6415666
+            // https://bugs.openjdk.java.net/browse/JDK-2143916
+            // https://bugs.openjdk.java.net/browse/JDK-6411864
+
+            // CHINESE_HK:
+            overrides.put(new Locale("zh", "HK"), "zh-HK-u-co-unihan");
+            // CHINESE_HK_STROKE:
+            overrides.put(new Locale("zh", "HK", "STROKE"), 
"zh-HK-u-co-stroke");
+            // CHINESE_TW:
+            overrides.put(new Locale("zh", "TW"), "zh-TW-u-co-unihan");
+            // CHINESE_TW_STROKE:
+            overrides.put(new Locale("zh", "TW", "STROKE"), 
"zh-TW-u-co-stroke");
+            // CHINESE:
+            overrides.put(new Locale("zh"), "zh-CN-u-co-unihan");
+            // CHINESE_STROKE:
+            overrides.put(new Locale("zh", "", "STROKE"), "zh-CN-u-co-stroke");
+            // CHINESE_PINYIN:
+            overrides.put(new Locale("zh", "", "PINYIN"), "zh-CN-u-co-pinyin");
+
+            return Collections.unmodifiableMap(overrides);
+        }
+    }
+
+    /**
+     * Constructor only used when building static data
+     */
+    LinguisticSort(Locale locale, String[] alphabet, String highValue, boolean 
reverseSecondary,
+                   boolean hasDoubleWidth, String collationKeySql) {
+        this.locale = locale;
+        this.alphabet = alphabet;
+        this.highValue = highValue;
+        assert this.highValue == null || this.highValue.length() == 1;
+        this.reverseSecondary = reverseSecondary;
+        this.hasDoubleWidth = hasDoubleWidth;
+        this.collationKeySql = collationKeySql;
+        // Construct collator for this locale
+        if 
(LinguisticSort.Icu4jCollatorOverrides.OVERRIDES.containsKey(this.locale)) {
+            // Force ICU4J collators for specific locales so they match Oracle 
sort
+            this.collator = 
CollatorICU.wrap(com.ibm.icu.text.Collator.getInstance(
+                    ULocale.forLanguageTag(LinguisticSort
+                            
.Icu4jCollatorOverrides.OVERRIDES.get(this.locale))));
+        } else if (this.locale.getVariant().length() > 0) {
+            // If there's a variant, use ICU4J to figure it out.
+            this.collator = 
CollatorICU.wrap(com.ibm.icu.text.Collator.getInstance(
+                    ULocale.forLocale(this.locale)));
+        } else {
+            this.collator = Collator.getInstance(this.locale);
+        }
+        this.collator.setStrength(Collator.SECONDARY);
+        this.upper = OracleUpperTable.forLinguisticSort(name());
+    }
+
+    /**
+     * @return a new collator for this LinguisticSort instance.
+     */
+    public Collator getCollator() {
+        // Since RuleBasedCollator.compare() is synchronized, it is not nice 
to return
+        // this.collator here, because that would mean requests for the same 
language
+        // will be waiting for each other.  Instead, return a clone.  And, 
cloning
+        // RuleBasedCollator instances is much more efficient than creating 
one from
+        // the rules.
+        return (Collator) this.collator.clone();
+    }
+
+    /**
+     * @return a new collator for this LinguisticSort instance that is 
guaranteed to be
+     * case-insensitive. Danish collation, unfortunately, is a little odd, in 
that "v"
+     * and "w" are considered to be the same character. To make up for this, 
they made
+     * "v" and "V" a secondary difference, which makes Enum comparisons in 
FilterItem
+     * a little wonky.  
http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4174436
+     */
+    public Collator getGuaranteedCaseInsensitiveCollator() {
+        Collator collator = getCollator();
+        if ("da".equals(this.locale.getLanguage())) {
+            collator.setStrength(Collator.PRIMARY);
+        }
+        return collator;
+    }
+
+    Locale getLocale() {
+        return this.locale;
+    }
+
+    /**
+     * @return a new comparator for strings for this LinguisticSort instance.
+     */
+    @SuppressWarnings("unchecked")
+    // Converting from Comparator<Object> to Comparator<String>
+    public Comparator<String> getNonCachingComparator() {
+        return (Comparator<String>) this.collator.clone();
+    }
+
+    /**
+     * @return a new comparator for strings for this LinguisticSort instance.
+     * @param size the number of elements to compare (default is 16).
+     */
+    public Comparator<String> getComparator(int size) {
+        return new LinguisticSort.CollatingComparator(getCollator(), size);
+    }
+
+    /**
+     * A String comparator that uses the current collation
+     */
+    static class CollatingComparator implements Comparator<String> {
+        private final Collator collator;
+        private final Map<String, CollationKey> cKeyMap;
+
+        CollatingComparator(Collator collator) {
+            this(collator, 16);
+        }
+
+        CollatingComparator(Collator collator, int defaultSize) {
+            this.collator = collator;
+            cKeyMap = new HashMap<>(defaultSize);
+        }
+
+        @SuppressWarnings(
+                value = "ES_COMPARING_PARAMETER_STRING_WITH_EQ",
+                justification = "Reference comparison used for performance 
improvement.")
+        public int compare(String o1, String o2) {
+            if (o1 == o2) {
+                return 0;
+            } else if (o2 == null) {
+                return 1;
+            } else if (o1 == null) {
+                return -1;
+            }
+
+            return getCollationKey(o1).compareTo(getCollationKey(o2));
+        }
+
+        private CollationKey getCollationKey(String comp) {
+            CollationKey key = cKeyMap.get(comp);
+            if (key == null) {
+                key = collator.getCollationKey(comp);
+                cKeyMap.put(comp, key);
+            }
+            return key;
+        }
+    }
+
+    /**
+     * Returns the number of letters to show in the rolodex.
+     */
+    public int getAlphabetLength() {
+        return this.alphabet.length;
+    }
+
+    /**
+     * Returns the n-th of letter in the rolodex.  Note, a 'letter'
+     * in a language be composed of more than one unicode characters,
+     * for example, letter 'ch' in Czech.
+     */
+    public String getAlphabet(int index) {
+        return this.alphabet[index];
+    }
+
+    // Used only for test code
+    String[] getAlphabet() {
+        return this.alphabet;
+    }
+
+    /**
+     * Return the rolodexIndex for a string.
+     *
+     * @param searchTerm  Must be a 1-char string
+     * @return the rolodexIndex, including Other (i.e. getAlphabetLength) if 
it doesn't
+     *         fall into a bucket. If this language doesn't have a rolodex 
(e.g. Arabic,
+     *         Latvian, etc.) return -1
+     * @throws IllegalArgumentException if the string is null or not of length 
1
+     */
+    public int getRolodexIndexForChar(String searchTerm) {
+        if (searchTerm == null || searchTerm.length() != 1) {
+            throw new IllegalArgumentException("Must be a one-length string");
+        }
+
+        if (this.getAlphabetLength() == 0) {
+            return -1;
+        }
+
+        for (int i = 0; i < this.getAlphabetLength(); i++) {
+            int comparison = this.collator.compare(searchTerm, 
this.getAlphabet(i));
+
+            if (comparison < 0) {
+                //If it's less than 'a', return Other
+                //Otherwise, it's less than the current index, but it wasn't 0 
on the
+                // previous comparison, so return the previous rolodex letter.
+                return i == 0 ? this.getAlphabetLength() : (i - 1);
+            } else if (comparison == 0) {
+                return i;
+            }
+        }
+        return this.getAlphabetLength();
+    }
+
+    /**
+     * Returns the sql expression to convert the given sql expression to upper 
case.
+     */
+    public String getUpperCaseSql(String expr, boolean isPostgres) {
+        if (isPostgres) {
+            return "icu_upper(" + expr + ",'" + this.locale.toString() + "')";
+        } else {
+            return upper.getSql(expr);
+        }
+    }
+
+    /**
+     * @return true if sql UPPER() is used in getUpperCaseSql().  Note that 
this is always false
+     *         for postgres because postgres always use the icu_upper() 
function for all languages.
+     */
+    public boolean usesUpperToGetUpperCase(boolean isPostgres) {
+        return !isPostgres && "upper(x)".equals(upper.getSql("x"));
+    }
+
+    /**
+     * Returns the upper case value of the given value, or what would be the 
result
+     * of applying the sql expression in getUpperCaseSql() to the given value.
+     */
+    public String getUpperCaseValue(String value, boolean isPostgres) {
+        String singleWidth = value;
+        if (this.hasDoubleWidth) {
+            singleWidth = toSingleWidth(value);
+        }
+        if (isPostgres) {
+            return singleWidth.toUpperCase(this.locale);
+        } else {
+            return upper.toUpperCase(singleWidth);
+        }
+    }
+
+    private static final char[][] DOUBLE_TO_SINGLE = new char[256][];
+    static {
+        DOUBLE_TO_SINGLE[0x20] = new char[256];
+        DOUBLE_TO_SINGLE[0x20][0x18] = '`';
+        DOUBLE_TO_SINGLE[0x20][0x19] = '\'';
+        DOUBLE_TO_SINGLE[0x20][0x1D] = '"';
+
+        DOUBLE_TO_SINGLE[0x22] = new char[256];
+        DOUBLE_TO_SINGLE[0x22][0x3C] = '~';
+
+        DOUBLE_TO_SINGLE[0x30] = new char[256];
+        DOUBLE_TO_SINGLE[0x30][0x00] = ' ';
+
+        DOUBLE_TO_SINGLE[0xFE] = new char[256];
+        DOUBLE_TO_SINGLE[0xFE][0x3F] = '^';
+
+        DOUBLE_TO_SINGLE[0xFF] = new char[256];
+        DOUBLE_TO_SINGLE[0xFF][0x01] = '!';
+        DOUBLE_TO_SINGLE[0xFF][0x03] = '#';
+        DOUBLE_TO_SINGLE[0xFF][0x04] = '$';
+        DOUBLE_TO_SINGLE[0xFF][0x05] = '%';
+        DOUBLE_TO_SINGLE[0xFF][0x06] = '&';
+        DOUBLE_TO_SINGLE[0xFF][0x08] = '(';
+        DOUBLE_TO_SINGLE[0xFF][0x09] = ')';
+        DOUBLE_TO_SINGLE[0xFF][0x0A] = '*';
+        DOUBLE_TO_SINGLE[0xFF][0x0B] = '+';
+        DOUBLE_TO_SINGLE[0xFF][0x0C] = ',';
+        DOUBLE_TO_SINGLE[0xFF][0x0D] = '-';
+        DOUBLE_TO_SINGLE[0xFF][0x0E] = '.';
+        DOUBLE_TO_SINGLE[0xFF][0x0F] = '/';
+        DOUBLE_TO_SINGLE[0xFF][0x10] = '0';
+        DOUBLE_TO_SINGLE[0xFF][0x11] = '1';
+        DOUBLE_TO_SINGLE[0xFF][0x12] = '2';
+        DOUBLE_TO_SINGLE[0xFF][0x13] = '3';
+        DOUBLE_TO_SINGLE[0xFF][0x14] = '4';
+        DOUBLE_TO_SINGLE[0xFF][0x15] = '5';
+        DOUBLE_TO_SINGLE[0xFF][0x16] = '6';
+        DOUBLE_TO_SINGLE[0xFF][0x17] = '7';
+        DOUBLE_TO_SINGLE[0xFF][0x18] = '8';
+        DOUBLE_TO_SINGLE[0xFF][0x19] = '9';
+        DOUBLE_TO_SINGLE[0xFF][0x1A] = ':';
+        DOUBLE_TO_SINGLE[0xFF][0x1B] = ';';
+        DOUBLE_TO_SINGLE[0xFF][0x1C] = '<';
+        DOUBLE_TO_SINGLE[0xFF][0x1D] = '=';
+        DOUBLE_TO_SINGLE[0xFF][0x1E] = '>';
+        DOUBLE_TO_SINGLE[0xFF][0x1F] = '?';
+        DOUBLE_TO_SINGLE[0xFF][0x20] = '@';
+        DOUBLE_TO_SINGLE[0xFF][0x21] = 'A';
+        DOUBLE_TO_SINGLE[0xFF][0x22] = 'B';
+        DOUBLE_TO_SINGLE[0xFF][0x23] = 'C';
+        DOUBLE_TO_SINGLE[0xFF][0x24] = 'D';
+        DOUBLE_TO_SINGLE[0xFF][0x25] = 'E';
+        DOUBLE_TO_SINGLE[0xFF][0x26] = 'F';
+        DOUBLE_TO_SINGLE[0xFF][0x27] = 'G';
+        DOUBLE_TO_SINGLE[0xFF][0x28] = 'H';
+        DOUBLE_TO_SINGLE[0xFF][0x29] = 'I';
+        DOUBLE_TO_SINGLE[0xFF][0x2A] = 'J';
+        DOUBLE_TO_SINGLE[0xFF][0x2B] = 'K';
+        DOUBLE_TO_SINGLE[0xFF][0x2C] = 'L';
+        DOUBLE_TO_SINGLE[0xFF][0x2D] = 'M';
+        DOUBLE_TO_SINGLE[0xFF][0x2E] = 'N';
+        DOUBLE_TO_SINGLE[0xFF][0x2F] = 'O';
+        DOUBLE_TO_SINGLE[0xFF][0x30] = 'P';
+        DOUBLE_TO_SINGLE[0xFF][0x31] = 'Q';
+        DOUBLE_TO_SINGLE[0xFF][0x32] = 'R';
+        DOUBLE_TO_SINGLE[0xFF][0x33] = 'S';
+        DOUBLE_TO_SINGLE[0xFF][0x34] = 'T';
+        DOUBLE_TO_SINGLE[0xFF][0x35] = 'U';
+        DOUBLE_TO_SINGLE[0xFF][0x36] = 'V';
+        DOUBLE_TO_SINGLE[0xFF][0x37] = 'W';
+        DOUBLE_TO_SINGLE[0xFF][0x38] = 'X';
+        DOUBLE_TO_SINGLE[0xFF][0x39] = 'Y';
+        DOUBLE_TO_SINGLE[0xFF][0x3A] = 'Z';
+        DOUBLE_TO_SINGLE[0xFF][0x3B] = '[';
+        DOUBLE_TO_SINGLE[0xFF][0x3C] = '\\';
+        DOUBLE_TO_SINGLE[0xFF][0x3D] = ']';
+        DOUBLE_TO_SINGLE[0xFF][0x3F] = '_';
+        DOUBLE_TO_SINGLE[0xFF][0x41] = 'a';
+        DOUBLE_TO_SINGLE[0xFF][0x42] = 'b';
+        DOUBLE_TO_SINGLE[0xFF][0x43] = 'c';
+        DOUBLE_TO_SINGLE[0xFF][0x44] = 'd';
+        DOUBLE_TO_SINGLE[0xFF][0x45] = 'e';
+        DOUBLE_TO_SINGLE[0xFF][0x46] = 'f';
+        DOUBLE_TO_SINGLE[0xFF][0x47] = 'g';
+        DOUBLE_TO_SINGLE[0xFF][0x48] = 'h';
+        DOUBLE_TO_SINGLE[0xFF][0x49] = 'i';
+        DOUBLE_TO_SINGLE[0xFF][0x4A] = 'j';
+        DOUBLE_TO_SINGLE[0xFF][0x4B] = 'k';
+        DOUBLE_TO_SINGLE[0xFF][0x4C] = 'l';
+        DOUBLE_TO_SINGLE[0xFF][0x4D] = 'm';
+        DOUBLE_TO_SINGLE[0xFF][0x4E] = 'n';
+        DOUBLE_TO_SINGLE[0xFF][0x4F] = 'o';
+        DOUBLE_TO_SINGLE[0xFF][0x50] = 'p';
+        DOUBLE_TO_SINGLE[0xFF][0x51] = 'q';
+        DOUBLE_TO_SINGLE[0xFF][0x52] = 'r';
+        DOUBLE_TO_SINGLE[0xFF][0x53] = 's';
+        DOUBLE_TO_SINGLE[0xFF][0x54] = 't';
+        DOUBLE_TO_SINGLE[0xFF][0x55] = 'u';
+        DOUBLE_TO_SINGLE[0xFF][0x56] = 'v';
+        DOUBLE_TO_SINGLE[0xFF][0x57] = 'w';
+        DOUBLE_TO_SINGLE[0xFF][0x58] = 'x';
+        DOUBLE_TO_SINGLE[0xFF][0x59] = 'y';
+        DOUBLE_TO_SINGLE[0xFF][0x5A] = 'z';
+        DOUBLE_TO_SINGLE[0xFF][0x5B] = '{';
+        DOUBLE_TO_SINGLE[0xFF][0x5C] = '|';
+        DOUBLE_TO_SINGLE[0xFF][0x5D] = '}';
+    }
+
+    public static char toSingleWidth(char c) {
+        // Mask off high 2 bytes and index into char[][]
+        char[] cBucket = DOUBLE_TO_SINGLE[c >> 8];
+        // If no bucket, then no translation so just use original char
+        if (cBucket == null) {
+            return c;
+        }
+        // Mask off low 2 bytes and index into char[]
+        char cSingle = cBucket[c & 0x00ff];
+        // If char at that index is zero, then no translation so just use 
original char
+        if (cSingle == 0) {
+            return c;
+        }
+        return cSingle;
+    }
+
+    /**
+     * Convert double width ascii characters to single width.
+     * This is the equivalent of Oracle's to_single_byte().
+     */
+    public static String toSingleWidth(String value) {
+        int n = value.length();
+        DeferredStringBuilder buf = new DeferredStringBuilder(value);
+
+        for (int i = 0; i < n; i++) {
+            char c = value.charAt(i);
+            buf.append(toSingleWidth(c));
+        }
+        return buf.toString();
+    }
+
+    /**
+     * Returns the sql expression to compute the linguistic sort collation key 
for the
+     * given sql expression.  This supports sorting in the database, where 
sort order
+     * of different upper and lower cases are handled linguistically.
+     */
+    public String getCollationKeySql(String expr, boolean isPostgres) {
+        if (isPostgres) {
+            return "icu_sortkey(" + expr + ",'" + this.locale.toString() + 
"')::text";
+        } else {
+            return MessageFormat.format(this.collationKeySql, new Object[] { 
expr });
+        }
+    }
+
+    /**
+     * Returns the sql expression to compute the linguistic sort collation key 
for the
+     * upper case of given sql expression.  This supports case-insensitive 
filtering
+     * in the database.
+     */
+    public String getUpperCollationKeySql(String expr, boolean isPostgres) {
+        if (!isPostgres && String.format(upper.getSqlFormatString(), "{0}")
+                .equals(this.collationKeySql)) {
+            return getCollationKeySql(expr, false);
+        }
+        return getCollationKeySql(getUpperCaseSql(expr, isPostgres), 
isPostgres);
+    }
+
+    private String formatLetter(String letter, boolean isPostgres) {
+        return getCollationKeySql('\'' + letter + '\'', isPostgres);
+    }
+
+    //
+    // Private Data
+    //
+
+    // TODO: Make this an environment variable.
+    private static final boolean IS_MULTI_LINGUAL = true; 
/*(SfdcEnvProvider.getEnv() == null ||
+            SfdcEnvProvider.getEnv().getIniFile().getString("Pages", 
"encoding").length() > 0);*/
+
+    static String[] getAlphabetFromICU(Locale locale) {
+        AlphabeticIndex<?> index = new AlphabeticIndex<String>(locale);
+        List<String> alphabet = index.getBucketLabels();
+        if (alphabet.size() > 6) {
+            // Strip off first and last (which are ...)
+            List<String> alphabetWithoutEllipses = alphabet.subList(1, 
alphabet.size() - 1);
+            return alphabetWithoutEllipses.toArray(new 
String[alphabetWithoutEllipses.size()]);
+        } else {
+            return new String[0];
+        }
+    }
+
+    /**
+     * You can't refer to a static defined inside the same class as an enum, 
so you need an
+     * inner class to have such constants
+     * These are the alphabets that cannot be auto-derived from ICU's CLDR 
information
+     */
+    static final class Alphabets {
+        static final String[] ENGLISH = { "A", "B", "C", "D", "E", "F", "G", 
"H", "I", "J", "K",
+            "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", 
"Y", "Z" };
+        static final String[] CATALAN = { "A", "B", "C", "\u00C7", "D", "E", 
"F", "G", "H", "I",
+            "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", 
"W", "X", "Y", "Z" };
+        static final String[] BASQUE = { "A", "B", "C", "\u00C7", "D", "E", 
"F", "G", "H", "I",
+            "J", "K", "L", "M", "N", "\u00D1", "O", "P", "Q", "R", "S", "T", 
"U", "V", "W", "X",
+            "Y", "Z" };
+        static final String[] JAPANESE = { "A", "B", "C", "D", "E", "F", "G", 
"H", "I", "J", "K",
+            "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", 
"Y", "Z", "\u30A2",
+            "\u30AB", "\u30B5", "\u30BF", "\u30CA", "\u30CF", "\u30DE", 
"\u30E4", "\u30E9",
+            "\u30EF" };
+
+        // A, B, C, Cs, D, E, F, G, Gy, H, I, J, K, L, Ly, M, N, Ny, O, Ö, P, 
Q, R, S, Sz, T,
+        // Ty, U, Ü, V, W, X, Y, Z, Zs
+        static final String[] HUNGARIAN = { "A", "B", "C", "Cs", "D", "E", 
"F", "G", "Gy", "H",
+            "I", "J", "K", "L", "Ly", "M", "N", "Ny", "O", "\u00d6", "P", "Q", 
"R", "S", "Sz",
+            "T", "Ty", "U", "\u00dc", "V", "W", "X", "Y", "Z", "Zs" };
+
+        static final String[] TURKISH = { "A", "B", "C", "\u00C7", "D", "E", 
"F", "G", "\u011E",
+            "H", "I", "\u0130", "J", "K", "L", "M", "N", "O", "\u00D6", "P", 
"R", "S", "\u015E",
+            "T", "U", "\u00DC", "V", "Y", "Z" };
+
+        // A, B, C, Ç, D, E, Ə, F, G, Ğ, H, X, I, İ, J, K, Q, L, M, N, O, Ö, 
P, R, S, Ş, T,
+        // U, Ü, V, Y, Z
+        static final String[] AZERBAIJANI = { "A", "B", "C", "\u00C7", "D", 
"E", "\u018F", "F",
+            "G", "\u011E", "H", "X", "I", "\u0130", "J", "K", "Q", "L", "M", 
"N", "O", "\u00D6",
+            "P", "R", "S", "\u015E", "T", "U", "\u00DC", "V", "Y", "Z" };
+
+        // Russian without  Ё, Ы, Э
+        static final String[] BULGARIAN = { "\u0410", "\u0411", "\u0412", 
"\u0413", "\u0414",
+            "\u0415", "\u0416", "\u0417", "\u0418", "\u0419", "\u041a", 
"\u041b", "\u041c",
+            "\u041d", "\u041e", "\u041f", "\u0420", "\u0421", "\u0422", 
"\u0423", "\u0424",
+            "\u0425", "\u0426", "\u0427", "\u0428", "\u0429", "\u042a", 
"\u042c", "\u042e",
+            "\u042f" };
+
+        // A B C Č Ć D Đ Dž E F G H I J K L Lj M N Nj O P R S Š T U V Z Ž
+        static final String[] SERBIAN_LATIN = { "A", "B", "C", "\u010c", 
"\u0106", "D", "\u0110",
+            "D\u017e", "E", "F", "G", "H", "I", "J", "K", "L", "Lj", "M", "N", 
"Nj", "O", "P", "R",
+            "S", "\u0160", "T", "U", "V", "Z", "\u017d" };
+
+        // A Á Ä B C Č D Ď DZ DŽ E É F G H CH I Í J K L Ĺ Ľ M N Ň O Ó Ô P Q R 
Ŕ S Š T Ť U Ú V W
+        // X Y Ý Z Ž
+        static final String[] SLOVAK = { "A", "\u00c1", "\u00c4", "B", "C", 
"\u010c", "D",
+            "\u010e", "DZ", "D\u017d", "E", "\u00c9", "F", "G", "H", "CH", 
"I", "\u00cd", "J",
+            "K", "L", "\u0139", "\u013d", "M", "N", "\u0147", "O", "\u00d3", 
"\u00d4", "P", "Q",
+            "R", "\u0154", "S", "\u0160", "T", "\u0164", "U", "\u00da", "V", 
"W", "X", "Y",
+            "\u00dd", "Z", "\u017d" };
+
+        // ა ბ გ დ ე ვ ზ თ ი კ ლ მ ნ ო პ ჟ რ ს ტ უ ფ ქ ღ .ყ შ ჩ ც ძ წ ჭ ხ ჯ ჰ
+        static final String[] GEORGIAN = { "\u10d0", "\u10d1", "\u10d2", 
"\u10d3", "\u10d4",
+            "\u10d5", "\u10d6", "\u10d7", "\u10d8", "\u10d9", "\u10da", 
"\u10db", "\u10dc",
+            "\u10dd", "\u10de", "\u10df", "\u10e0", "\u10e1", "\u10e2", 
"\u10e3", "\u10e4",
+            "\u10e5", "\u10e6", "\u10e7", "\u10e8", "\u10e9", "\u10ea", 
"\u10eb", "\u10ec",
+            "\u10ed", "\u10ee", "\u10ef", "\u10f0" };
+
+        // A B C D E F G H I J K L M N O P Q R S Š Z Ž T U V W Õ Ä Ö Ü X Y
+        static final String[] ESTONIAN = { "A", "B", "C", "D", "E", "F", "G", 
"H", "I", "J", "K",
+            "L", "M", "N", "O", "P", "Q", "R", "S", "\u0160", "Z", "\u017d", 
"T", "U", "V", "W",
+            "\u00d5", "\u00c4", "\u00d6", "\u00dc", "X", "Y" };
+
+        // A Á B D Ð E É F G H I Í J K L M N O Ó P R S T U Ú V X Y Ý Þ Æ Ö
+        static final String[] ICELANDIC = { "A", "\u00c1", "B", "D", "\u00d0", 
"E", "\u00c9", "F",
+            "G", "H", "I", "\u00cd", "J", "K", "L", "M", "N", "O", "\u00d3", 
"P", "R", "S", "T",
+            "U", "\u00da", "V", "X", "Y", "\u00dd", "\u00de", "\u00c6", 
"\u00d6" };
+
+        // A Ā B C Č D E Ē F G Ģ H I Ī J K Ķ L Ļ M N Ņ O P R S Š T U Ū V Z Ž
+        static final String[] LATVIAN = { "A", "\u0100", "B", "C", "\u010c", 
"D", "E", "\u0112",
+            "F", "G", "\u0122", "H", "I", "\u012a", "J", "K", "\u0136", "L", 
"\u013b", "M", "N",
+            "\u0145", "O", "P", "R", "S", "\u0160", "T", "U", "\u016a", "V", 
"Z", "\u017d" };
+
+        // A \u0104 B C \u010c D E \u0118 \u0116 F G H I \u012e Y J K L M N O 
P R S \u0160 T U
+        // \u0172 \u016a V Z \u017d
+        static final String[] LUXEMBOURGISH = { "A", "B", "C", "D", "E", "F", 
"G", "H", "I",
+            "J", "K", "L", "M", "N", "O", "P", "R", "S", "T", "U", "V", "W", 
"X", "Y", "Z",
+            "Ä", "Ë", "É" };
+
+        // Russian with Ң, Ө, Ү
+        static final String[] KYRGYZ =  { "\u0410", "\u0411", "\u0412", 
"\u0413", "\u0414",
+            "\u0415", "\u0401", "\u0416", "\u0417", "\u0418", "\u0419", 
"\u041a", "\u041b",
+            "\u041c", "\u041d", "\u04a2", "\u041e", "\u04e8", "\u041f", 
"\u0420", "\u0421",
+            "\u0422", "\u0423", "\u04ae", "\u0424", "\u0425", "\u0426", 
"\u0427", "\u0428",
+            "\u0429", "\u042a", "\u042b", "\u042c", "\u042d", "\u042e", 
"\u042f" };
+
+        // Kyrgyz with Ә, Ғ, Ұ, Һ, І (ICU4J doesn't have some of these 
characters for sorting...)
+        static final String[] KAZAKH =  { "\u0410", "\u04d8", "\u0411", 
"\u0412", "\u0413",
+            "\u0492", "\u0414", "\u0415", "\u0401", "\u0416", "\u0417", 
"\u0418", "\u0419",
+            "\u041a", "\u049a", "\u041b", "\u041c", "\u041d", "\u04a2", 
"\u041e", "\u04e8",
+            "\u041f", "\u0420", "\u0421", "\u0422", "\u0423", "\u04b0", 
"\u04ae", "\u0424",
+            "\u0425", "\u04ba", "\u0426", "\u0427", "\u0428", "\u0429", 
"\u042a", "\u042b",
+            "\u0406", "\u042c", "\u042d", "\u042e", "\u042f" };
+
+        // Cyrillic Variant
+        static final String[] TAJIK = { "\u0410", "\u0411", "\u0412", 
"\u0413", "\u0492", "\u0414",
+            "\u0415", "\u0401", "\u0416", "\u0417", "\u0418", "\u04e2", 
"\u0419", "\u041a",
+            "\u049a", "\u041b", "\u041c", "\u041d", "\u041e", "\u041f", 
"\u0420", "\u0421",
+            "\u0422", "\u0423", "\u04ee", "\u0424", "\u0425", "\u04b2", 
"\u0427", "\u04b6",
+            "\u0428",  "\u042a", "\u042d", "\u042e", "\u042f" };
+
+        // اآبپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنوەھ۶ىے
+        static final String[] URDU = new String[] {"\u0627", "\u0622", 
"\u0628", "\u067e",
+            "\u062a", "\u0679", "\u062b", "\u062c", "\u0686", "\u062d", 
"\u062e", "\u062f",
+            "\u0688", "\u0630", "\u0631", "\u0691", "\u0632", "\u0698", 
"\u0633", "\u0634",
+            "\u0635", "\u0636", "\u0637", "\u0638", "\u0639", "\u063a", 
"\u0641", "\u0642",
+            "\u06a9", "\u06af", "\u0644", "\u0645", "\u0646", "\u0648", 
"\u06d5", "\u06be",
+            "\u06f6", "\u0649", "\u06d2" };
+
+        // W-1308726: removed Ö and Ü; oracle treats them as the same 
characters as O and U.
+        // A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, ß, T, U, 
V, W, X, Y, Z
+        static final String[] GERMAN = { "A", "B", "C", "D", "E", "F", "G", 
"H", "I", "J", "K",
+            "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", 
"Y", "Z" };
+
+        // 
ক,খ,গ,ঘ,ঙ,চ,ছ,জ,ঝ,ঞ,ট,ঠ,ড,ঢ,ণ,ত,দ,ধ,ন,প,ফ,ব,ভ,ম,য,র,ল,শ,ষ,স,হ,য়,ড়,ঢ,অ,
+        // আ,ই,ঈ,উ,ঊ,ঋ,ৠ,এ,ঐ,ও,ঔ
+        static final String[] BENGALI = { "\u0995", "\u0996", "\u0997", 
"\u0998", "\u0999",
+            "\u099a", "\u099b", "\u099c", "\u099d", "\u099e", "\u099f", 
"\u09a0", "\u09a1",
+            "\u09a2", "\u09a3", "\u09a4", "\u09a6", "\u09a7", "\u09a8", 
"\u09aa", "\u09ab",
+            "\u09ac", "\u09ad", "\u09ae", "\u09af", "\u09b0", "\u09b2", 
"\u09b6", "\u09b7",
+            "\u09b8", "\u09b9", "\u09af\u09bc", "\u09a1\u09bc", "\u09a2", 
"\u0985", "\u0986",
+            "\u0987", "\u0988", "\u0989", "\u098a", "\u098b", "\u09e0", 
"\u098f", "\u0990",
+            "\u0993", "\u0994" };
+
+        // A, Ą, B, C, Č, D, E, Ę, Ė, F, G, H, I, Į, Y, J, K, L, M, N, O, P, 
R, S, Š, T, U, Ų,
+        // Ū, V, Z, Ž
+        static final String[] LITHUANIAN = { "A", "\u0104", "B", "C", 
"\u010c", "D", "E", "\u0118",
+            "\u0116", "F", "G", "H", "I", "\u012e", "Y", "J", "K", "L", "M", 
"N", "O", "P", "R",
+            "S", "\u0160", "T", "U", "\u0172", "\u016a", "V", "Z", "\u017d" };
+
+        // A, B, C, Č, D, E, F, G, H, I, J, K, L, M, N, O, P, R, S, Š, T, U, 
V, Z, Ž
+        static final String[] SLOVENE = { "A", "B", "C", "\u010c", "D", "E", 
"F", "G", "H", "I",
+            "J", "K", "L", "M", "N", "O", "P", "R", "S", "\u0160", "T", "U", 
"V", "Z", "\u017d" };
+
+        // Contains "TAMIL LETTER"s from 
http://www.unicode.org/charts/PDF/U0B80.pdf
+        //அ, ஆ, இ, ஈ, உ, ஊ, எ, ஏ, ஐ, ஒ, ஓ, ஔ, க, ங, ச, ஜ, ஞ,
+        //ட, ண, த, ந, ன, ப, ம, ய, ர, ற, ல, ள, ழ, வ, ஶ, ஷ, ஸ, ஹ
+        static final String[] TAMIL = { "\u0B85", "\u0B86", "\u0B87", 
"\u0B88", "\u0B89", "\u0B8A",
+            "\u0B8E", "\u0B8F", "\u0B90", "\u0B92", "\u0B93", "\u0B94", 
"\u0B95", "\u0B99",
+            "\u0B9A", "\u0B9C", "\u0B9E", "\u0B9F", "\u0BA3", "\u0BA4", 
"\u0BA8", "\u0BA9",
+            "\u0BAA", "\u0BAE", "\u0BAF", "\u0BB0", "\u0BB1", "\u0BB2", 
"\u0BB3", "\u0BB4",
+            "\u0BB5", "\u0BB6", "\u0BB7", "\u0BB8", "\u0BB9" };
+
+        static final String STRING = "upper({0})";
+
+        static final String[] JAPANESE_ROLODEX = {
+            // Notes: unistr('\xxxx') is the Oracle sql expression to get 
unicode
+            // character by code point.
+            // Two backslashes are converted to one backslash by java compiler.
+            /* 'A'  */"unistr('\\3041')",
+            /* 'Ka' */"unistr('\\30F5')",
+            /* 'Sa' */"unistr('\\3055')",
+            /* 'Ta' */"unistr('\\305F')",
+            /* 'Na' */"unistr('\\306A')",
+            /* 'Ha' */"unistr('\\306F')",
+            /* 'Ma' */"unistr('\\307E')",
+            /* 'Ya' */"unistr('\\3084')",
+            /* 'Ra' */"unistr('\\3089')",
+            /* 'Wa' */"unistr('\\308E')", "unistr('\\309D')" };
+
+        // Notes: unistr('\xxxx') is the Oracle sql expression to get unicode 
character
+        // by code point. Two backslashes are converted to one backslash by 
java compiler.
+        static final String[] JAPANESE_ROLODEX_JAVA = {
+            /* 'A'  */"\u3041",
+            /* 'Ka' */"\u30F5",
+            /* 'Sa' */"\u3055",
+            /* 'Ta' */"\u305F",
+            /* 'Na" */"\u306A",
+            /* 'Ha' */"\u306F",
+            /* 'Ma' */"\u307E",
+            /* 'Ya' */"\u3084",
+            /* 'Ra' */"\u3089",
+            /* 'Wa' */"\u308E",
+            "\u3001" // this is the first character after the last valid kana 
in java
+        };
+    }
+
+    /**
+     * Apex and possibly other things collate based on upper case versions of 
strings.
+     * Always upper casing and then comparing is slow, though, so this method 
is intended
+     * to return a collator that is consistent with uppper-case-then-compare 
while perhaps
+     * doing something more efficient
+     */
+    public Collator getUpperCaseCollator(final boolean isPostgres) {
+        final Collator innerCollator = getCollator();
+
+        // so far, the best I've been able to do that doesn't break sort order 
is to special
+        // case the english locale and scan for non-ascii characters before 
deciding how to
+        // proceed. With some work the same basic idea would work in many 
other locales but
+        // it would be very nice to find a more general and faster approach. 
The challenge
+        // is that upper casing effectively "normalizes" strings in a way that 
is very hard
+        // to replicate - for instance, western ligatures tend to get expanded 
by upper casing
+        // but Hangul ones don't. Even when that's all sorted out there's the 
issue that the
+        // built in collation rules for various locales are fairly narrowly 
focused. So, for
+        // instance, the English locale doesn't have rules for sorting Greek. 
With a case
+        // insensitive compare in the English locale, lower case Greek letters 
sort
+        // differently from upper case Greek letters but the English locale 
does upper case
+        // Greek letters.
+        if (!isPostgres && getLocale() == Locale.ENGLISH) {
+            innerCollator.setStrength(Collator.SECONDARY);
+            return new Collator() {
+                @Override
+                public int compare(String source, String target) {
+                    // upper case only strings where the SECONDARY strength 
comparison
+                    // (case insensitive comparison) is possibly different for 
upper
+                    // cased and non upper cased strings
+                    return innerCollator.compare(getUpperCaseIfNeeded(source),
+                            getUpperCaseIfNeeded(target));
+                }
+
+                /**
+                 * Upper cases on any non-ascii character
+                 */
+                private String getUpperCaseIfNeeded(String string) {
+                    for (int i = 0; i < string.length(); i++) {
+                        final char ch = string.charAt(i);
+                        if (ch > 127) {
+                            // non-ascii character, bail and use the upper 
case version
+                            return getUpperCaseValue(string, false);
+                        }
+                    }
+                    // no non-ascii characters found, we don't need to upper 
case
+                    // - sorting with strength SECONDARY is equivalent.
+                    return string;
+                }
+
+                @Override
+                public CollationKey getCollationKey(String source) {
+                    return 
innerCollator.getCollationKey(getUpperCaseIfNeeded(source));
+                }
+
+                @Override
+                public int hashCode() {
+                    return LinguisticSort.this.hashCode();
+                }
+
+                @Override
+                public boolean equals(Object that) {
+                    return super.equals(that);
+                }
+            };
+        } else {
+            return new Collator() {
+                @Override
+                public int compare(String source, String target) {
+                    return innerCollator.compare(getUpperCaseValue(source, 
isPostgres),
+                            getUpperCaseValue(target, isPostgres));
+                }
+
+                @Override
+                public CollationKey getCollationKey(String source) {
+                    return 
innerCollator.getCollationKey(getUpperCaseValue(source, isPostgres));
+                }
+
+                @Override
+                public int hashCode() {
+                    return LinguisticSort.this.hashCode();
+                }
+
+                @Override
+                public boolean equals(Object that) {
+                    return super.equals(that);
+                }
+            };
+        }
+    }
+}
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/LocaleUtils.java 
b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/LocaleUtils.java
new file mode 100644
index 0000000000..b07e5b6620
--- /dev/null
+++ b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/LocaleUtils.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util.i18n;
+
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.phoenix.thirdparty.com.google.common.base.Splitter;
+import org.apache.phoenix.thirdparty.com.google.common.collect.Lists;
+
+/**
+ * This utility class was partially copied from Salesforce's 
internationalization utility library
+ * (com.salesforce.i18n:i18n-util:1.0.4), which was released under the 
3-clause BSD License.
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ *
+ * A collection of utilities for dealing with Locales.
+ */
+public enum LocaleUtils {
+    INSTANCE;
+
+    public static LocaleUtils get() {
+        return INSTANCE;
+    }
+
+    // TODO: The number of locales in the system is rather small,
+    //  but we should probably use a ConcurrentLruMap just in case.
+    private static final ConcurrentMap<Locale, Locale> UNIQUE_LOCALE_MAP =
+            new ConcurrentHashMap<>(64, .75f, 2);
+
+    /**
+     * Returns a locale for language-only ("en") or language/country ("en_UK")
+     * iso codes
+     */
+    public Locale getLocaleByIsoCode(String isoCode) {
+        if (isoCode == null) {
+            return null;
+        }
+        if (isoCode.length() == 2) {
+            return uniqueifyLocale(new Locale(isoCode));
+        } else if (isoCode.length() == 5) {
+            String countryIsoCode = isoCode.substring(3, 5);
+            String langIsoCode = isoCode.substring(0, 2);
+            return uniqueifyLocale(new Locale(langIsoCode, countryIsoCode));
+        } else {
+            List<String> split = 
Lists.newArrayList(Splitter.on('_').split(isoCode));
+            String language = split.get(0);
+            String country = split.size() > 1 ? split.get(1) : "";
+            String variant = split.size() > 2 ? split.get(2) : "";
+            return uniqueifyLocale(new Locale(language, country, variant));
+        }
+    }
+
+    /**
+     * If you're going to cache a locale, it should call this function so that 
it caches
+     * @param value the locale to uniquify
+     * @return the unique locale
+     */
+    static Locale uniqueifyLocale(Locale value) {
+        if (value == null) {
+            return null;
+        }
+        Locale oldValue = UNIQUE_LOCALE_MAP.get(value);
+        if (oldValue != null) {
+            return oldValue;
+        }
+        UNIQUE_LOCALE_MAP.put(value, value);
+        return value;
+    }
+}
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/OracleUpper.java 
b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/OracleUpper.java
new file mode 100644
index 0000000000..128990d180
--- /dev/null
+++ b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/OracleUpper.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util.i18n;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * This utility class was partially copied from Salesforce's 
internationalization utility library
+ * (com.salesforce.i18n:i18n-util:1.0.4), which was released under the 
3-clause BSD License.
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ *
+ * OracleUpper is used in combination with OracleUpperTable to generate 
upper-case output
+ * consistent particular chosen Oracle expressions.
+ *
+ * @see OracleUpperTable
+ */
+public class OracleUpper {
+
+    private OracleUpper() {
+        // HideUtilityClassConstructor
+    }
+
+    /**
+     * Upper-case {@code value}, using the information in {@code t} to produce 
a result
+     * consistent with the PL/SQL expression used to generate t.
+     */
+    public static String toUpperCase(OracleUpperTable t, String value) {
+        // Oracle's upper or nls_upper are known to disagree with Java on some 
particulars.
+        //  We search for known exceptional characters and if found take 
measures to adjust
+        // Java's String.toUpperCase. In the average case we incur just a 
single relatively
+        // fast scan of the string. In typical bad cases we'll incur two extra 
String copies
+        // (one copy into the buffer, one out -- this on top of whatever's 
required by
+        // toUpperCase). Note that we have to match Oracle even for characters 
outside the
+        // language's alphabet since we still want to return records 
containing those characters.
+        char[] exceptions = t.getUpperCaseExceptions();
+        if (exceptions.length > 0) {
+            // Prefer to use String.indexOf in the case of a single search 
char; it's faster by
+            // virtue of not requiring two loops and being intrinsic.
+            int nextExceptionIndex = (exceptions.length == 1)
+                    ? value.indexOf(exceptions[0]) : 
StringUtils.indexOfAny(value, exceptions);
+
+            if (nextExceptionIndex >= 0) {
+                // Annoying case: we have found a character that we know 
Oracle handles differently
+                // than Java and we must adjust appropriately.
+                StringBuilder result = new StringBuilder(value.length());
+                String rem = value;
+                do {
+                    char nextException = rem.charAt(nextExceptionIndex);
+
+                    result.append(rem.substring(0, 
nextExceptionIndex).toUpperCase(t.getLocale()));
+                    
result.append(t.getUpperCaseExceptionMapping(nextException));
+
+                    rem = rem.substring(nextExceptionIndex + 1);
+                    nextExceptionIndex = (exceptions.length == 1)
+                            ? rem.indexOf(exceptions[0]) : 
StringUtils.indexOfAny(rem, exceptions);
+                } while (nextExceptionIndex >= 0);
+                result.append(rem.toUpperCase(t.getLocale()));
+
+                return result.toString();
+            }
+        }
+
+        // Nice case: we know of no reason that Oracle and Java wouldn't agree 
when converting
+        // to upper case.
+        return value.toUpperCase(t.getLocale());
+    }
+}
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/OracleUpperTable.java 
b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/OracleUpperTable.java
new file mode 100644
index 0000000000..b453a1bbd5
--- /dev/null
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/OracleUpperTable.java
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util.i18n;
+
+import java.util.Locale;
+
+import edu.umd.cs.findbugs.annotations.SuppressWarnings;
+
+/**
+ * This utility class was partially copied from Salesforce's 
internationalization utility library
+ * (com.salesforce.i18n:i18n-util:1.0.4), which was released under the 
3-clause BSD License.
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ *
+ * Generated by i18n.OracleUpperTableGeneratorTest
+ * <p>
+ * An instance of this enum codifies the difference between executing a
+ * {@link #getSqlFormatString() particular PL/SQL expression} in Oracle and 
executing
+ * {@link String#toUpperCase(Locale)} for a {@link #getLocale() particular 
locale} in Java. These
+ * differences (also called exceptions) are expressed by the output of
+ * {@link #getUpperCaseExceptions()} and {@link 
#getUpperCaseExceptionMapping(char)}.
+ * <p>
+ * The tables are generated by testing a particular set of characters that are 
known to contain
+ * exceptions and {@link #toUpperCase(String) may be used} to compensate for 
exceptions found and
+ * generate output in Java that will be consistent with Oracle for the given 
(sql expression,
+ * locale) pair over all tested values.
+ * <p>
+ * Characters tested:
+ * <ul>
+ * <li>U+0069 &#x0069</li>
+ * <li>U+00df &#x00df</li>
+ * <li>U+0386 &#x0386</li>
+ * <li>U+0388 &#x0388</li>
+ * <li>U+0389 &#x0389</li>
+ * <li>U+038a &#x038a</li>
+ * <li>U+038c &#x038c</li>
+ * <li>U+038e &#x038e</li>
+ * <li>U+038f &#x038f</li>
+ * <li>U+03ac &#x03ac</li>
+ * <li>U+03ad &#x03ad</li>
+ * <li>U+03ae &#x03ae</li>
+ * <li>U+03af &#x03af</li>
+ * <li>U+03cc &#x03cc</li>
+ * <li>U+03cd &#x03cd</li>
+ * <li>U+03ce &#x03ce</li>
+ * </ul>
+ *
+ * @see OracleUpper
+ */
+public enum OracleUpperTable {
+    ENGLISH("upper(%s)", "en", "ß"),
+    GERMAN("nls_upper(%s, 'nls_sort=xgerman')", "de", ""),
+    FRENCH("nls_upper(%s, 'nls_sort=xfrench')", "fr", "ß"),
+    ITALIAN("nls_upper(%s, 'nls_sort=italian')", "it", "ß"),
+    SPANISH("nls_upper(%s, 'nls_sort=spanish')", "es", "ß"),
+    CATALAN("nls_upper(%s, 'nls_sort=catalan')", "ca", "ß"),
+    DUTCH("nls_upper(%s, 'nls_sort=dutch')", "nl", "ß"),
+    PORTUGUESE("nls_upper(%s, 'nls_sort=west_european')", "pt", "ß"),
+    DANISH("nls_upper(%s, 'nls_sort=danish')", "da", "ß"),
+    NORWEGIAN("nls_upper(%s, 'nls_sort=norwegian')", "no", "ß"),
+    SWEDISH("nls_upper(%s, 'nls_sort=swedish')", "sv", "ß"),
+    FINNISH("nls_upper(%s, 'nls_sort=finnish')", "fi", "ß"),
+    CZECH("nls_upper(%s, 'nls_sort=xczech')", "cs", "ß"),
+    POLISH("nls_upper(%s, 'nls_sort=polish')", "pl", "ß"),
+    TURKISH("nls_upper(translate(%s,'i','İ'), 'nls_sort=xturkish')", "tr", 
"ß"),
+    CHINESE_HK("nls_upper(to_single_byte(%s), 'nls_sort=tchinese_radical_m')", 
"zh", ""),
+    CHINESE_HK_STROKE("nls_upper(to_single_byte(%s), 
'nls_sort=tchinese_stroke_m')", "zh", ""),
+    CHINESE_TW("nls_upper(to_single_byte(%s), 'nls_sort=tchinese_radical_m')", 
"zh", ""),
+    CHINESE_TW_STROKE("nls_upper(to_single_byte(%s), 
'nls_sort=tchinese_stroke_m')", "zh", ""),
+    CHINESE("nls_upper(to_single_byte(%s), 'nls_sort=schinese_radical_m')", 
"zh", ""),
+    CHINESE_STROKE("nls_upper(to_single_byte(%s), 
'nls_sort=schinese_stroke_m')", "zh", ""),
+    CHINESE_PINYIN("nls_upper(to_single_byte(%s), 
'nls_sort=schinese_pinyin_m')", "zh", ""),
+    JAPANESE("nls_upper(to_single_byte(%s), 'nls_sort=japanese_m')", "ja", ""),
+    KOREAN("nls_upper(to_single_byte(%s), 'nls_sort=korean_m')", "ko", ""),
+    RUSSIAN("nls_upper(%s, 'nls_sort=russian')", "ru", "ß"),
+    BULGARIAN("nls_upper(%s, 'nls_sort=bulgarian')", "bg", "ß"),
+    INDONESIAN("nls_upper(%s, 'nls_sort=indonesian')", "in", "ß"),
+    ROMANIAN("nls_upper(%s, 'nls_sort=romanian')", "ro", "ß"),
+    VIETNAMESE("nls_upper(%s, 'nls_sort=vietnamese')", "vi", "ß"),
+    UKRAINIAN("nls_upper(%s, 'nls_sort=ukrainian')", "uk", "ß"),
+    HUNGARIAN("nls_upper(%s, 'nls_sort=xhungarian')", "hu", ""),
+    GREEK("nls_upper(%s, 'nls_sort=greek')", "el", "ßΆΈΉΊΌΎΏάέήίόύώ"),
+    HEBREW("nls_upper(%s, 'nls_sort=hebrew')", "iw", "ß"),
+    SLOVAK("nls_upper(%s, 'nls_sort=slovak')", "sk", "ß"),
+    SERBIAN_CYRILLIC("nls_upper(%s, 'nls_sort=generic_m')", "sr", ""),
+    SERBIAN_LATIN("nls_upper(%s, 'nls_sort=xcroatian')", "sh", "ß"),
+    BOSNIAN("nls_upper(%s, 'nls_sort=xcroatian')", "bs", "ß"),
+    GEORGIAN("nls_upper(%s, 'nls_sort=binary')", "ka", "ß"),
+    BASQUE("nls_upper(%s, 'nls_sort=west_european')", "eu", "ß"),
+    MALTESE("nls_upper(%s, 'nls_sort=west_european')", "mt", "ß"),
+    ROMANSH("nls_upper(%s, 'nls_sort=west_european')", "rm", "ß"),
+    LUXEMBOURGISH("nls_upper(%s, 'nls_sort=west_european')", "lb", "ß"),
+    IRISH("nls_upper(%s, 'nls_sort=west_european')", "ga", "ß"),
+    SLOVENE("nls_upper(%s, 'nls_sort=xslovenian')", "sl", "ß"),
+    CROATIAN("nls_upper(%s, 'nls_sort=xcroatian')", "hr", "ß"),
+    MALAY("nls_upper(%s, 'nls_sort=malay')", "ms", "ß"),
+    ARABIC("nls_upper(%s, 'nls_sort=arabic')", "ar", "ß"),
+    ESTONIAN("nls_upper(%s, 'nls_sort=estonian')", "et", "ß"),
+    ICELANDIC("nls_upper(%s, 'nls_sort=icelandic')", "is", "ß"),
+    LATVIAN("nls_upper(%s, 'nls_sort=latvian')", "lv", "ß"),
+    LITHUANIAN("nls_upper(%s, 'nls_sort=lithuanian')", "lt", "ß"),
+    KYRGYZ("nls_upper(%s, 'nls_sort=binary')", "ky", "ß"),
+    KAZAKH("nls_upper(%s, 'nls_sort=binary')", "kk", "ß"),
+    TAJIK("nls_upper(%s, 'nls_sort=russian')", "tg", "ß"),
+    BELARUSIAN("nls_upper(%s, 'nls_sort=russian')", "be", "ß"),
+    TURKMEN("nls_upper(translate(%s,'i','İ'), 'nls_sort=xturkish')", "tk", 
"iß"),
+    AZERBAIJANI("nls_upper(translate(%s,'i','İ'), 'nls_sort=xturkish')", "az", 
"ß"),
+    ARMENIAN("nls_upper(%s, 'nls_sort=binary')", "hy", "ß"),
+    THAI("nls_upper(%s, 'nls_sort=thai_dictionary')", "th", "ß"),
+    HINDI("nls_upper(%s, 'nls_sort=binary')", "hi", "ß"),
+    URDU("nls_upper(%s, 'nls_sort=arabic')", "ur", "ß"),
+    BENGALI("nls_upper(%s, 'nls_sort=bengali')", "bn", "ß"),
+    TAMIL("nls_upper(%s, 'nls_sort=binary')", "ta", "ß"),
+    ESPERANTO("upper(%s)", "eo", ""),
+    XWEST_EUROPEAN("NLS_UPPER(%s,'NLS_SORT=xwest_european')", "en", "");
+
+    private final String sql;
+    private final Locale locale;
+    private final char[] exceptionChars;
+
+    OracleUpperTable(String sql, String lang, String exceptionChars) {
+        this.sql = sql;
+        this.locale = new Locale(lang);
+        this.exceptionChars = exceptionChars.toCharArray();
+    }
+
+    /**
+     * Return an array containing characters for which Java's 
String.toUpperCase method is known
+     * to deviate from the result of Oracle evaluating {@link #getSql(String) 
this expression}.
+     *
+     * @return an array containing all exceptional characters.
+     */
+    final char[] getUpperCaseExceptions() {
+        return exceptionChars;
+    }
+
+    /**
+     * For a character, {@code exception}, contained in the String returned 
from
+     * {@link #getUpperCaseExceptions()}, this method returns the anticipated 
result of
+     * upper-casing the character in Oracle when evaluating
+     * {@link #getSql(String) this expression}.
+     *
+     * @return the upper case of {@code exception}, according to what Oracle 
would do.
+     * @throws IllegalArgumentException
+     *             if the character is not contained in the String returned by
+     *             {@link #getUpperCaseExceptions()}.
+     */
+    final String getUpperCaseExceptionMapping(char exception) {
+        switch (exception) {
+        case 'i':
+            switch (this) {
+            case TURKMEN: return "İ"; // I
+            default: // fall out
+            }
+            break;
+        case 'ß':
+            switch (this) {
+            case ENGLISH: return "ß"; // SS
+            case FRENCH: return "ß"; // SS
+            case ITALIAN: return "ß"; // SS
+            case SPANISH: return "ß"; // SS
+            case CATALAN: return "ß"; // SS
+            case DUTCH: return "ß"; // SS
+            case PORTUGUESE: return "ß"; // SS
+            case DANISH: return "ß"; // SS
+            case NORWEGIAN: return "ß"; // SS
+            case SWEDISH: return "ß"; // SS
+            case FINNISH: return "ß"; // SS
+            case CZECH: return "ß"; // SS
+            case POLISH: return "ß"; // SS
+            case TURKISH: return "ß"; // SS
+            case RUSSIAN: return "ß"; // SS
+            case BULGARIAN: return "ß"; // SS
+            case INDONESIAN: return "ß"; // SS
+            case ROMANIAN: return "ß"; // SS
+            case VIETNAMESE: return "ß"; // SS
+            case UKRAINIAN: return "ß"; // SS
+            case GREEK: return "ß"; // SS
+            case HEBREW: return "ß"; // SS
+            case SLOVAK: return "ß"; // SS
+            case SERBIAN_LATIN: return "ß"; // SS
+            case BOSNIAN: return "ß"; // SS
+            case GEORGIAN: return "ß"; // SS
+            case BASQUE: return "ß"; // SS
+            case MALTESE: return "ß"; // SS
+            case ROMANSH: return "ß"; // SS
+            case LUXEMBOURGISH: return "ß"; // SS
+            case IRISH: return "ß"; // SS
+            case SLOVENE: return "ß"; // SS
+            case CROATIAN: return "ß"; // SS
+            case MALAY: return "ß"; // SS
+            case ARABIC: return "ß"; // SS
+            case ESTONIAN: return "ß"; // SS
+            case ICELANDIC: return "ß"; // SS
+            case LATVIAN: return "ß"; // SS
+            case LITHUANIAN: return "ß"; // SS
+            case KYRGYZ: return "ß"; // SS
+            case KAZAKH: return "ß"; // SS
+            case TAJIK: return "ß"; // SS
+            case BELARUSIAN: return "ß"; // SS
+            case TURKMEN: return "ß"; // SS
+            case AZERBAIJANI: return "ß"; // SS
+            case ARMENIAN: return "ß"; // SS
+            case THAI: return "ß"; // SS
+            case HINDI: return "ß"; // SS
+            case URDU: return "ß"; // SS
+            case BENGALI: return "ß"; // SS
+            case TAMIL: return "ß"; // SS
+            default: // fall out
+            }
+            break;
+        case 'Ά':
+            switch (this) {
+            case GREEK: return "Α"; // Ά
+            default: // fall out
+            }
+            break;
+        case 'Έ':
+            switch (this) {
+            case GREEK: return "Ε"; // Έ
+            default: // fall out
+            }
+            break;
+        case 'Ή':
+            switch (this) {
+            case GREEK: return "Η"; // Ή
+            default: // fall out
+            }
+            break;
+        case 'Ί':
+            switch (this) {
+            case GREEK: return "Ι"; // Ί
+            default: // fall out
+            }
+            break;
+        case 'Ό':
+            switch (this) {
+            case GREEK: return "Ο"; // Ό
+            default: // fall out
+            }
+            break;
+        case 'Ύ':
+            switch (this) {
+            case GREEK: return "Υ"; // Ύ
+            default: // fall out
+            }
+            break;
+        case 'Ώ':
+            switch (this) {
+            case GREEK: return "Ω"; // Ώ
+            default: // fall out
+            }
+            break;
+        case 'ά':
+            switch (this) {
+            case GREEK: return "Α"; // Ά
+            default: // fall out
+            }
+            break;
+        case 'έ':
+            switch (this) {
+            case GREEK: return "Ε"; // Έ
+            default: // fall out
+            }
+            break;
+        case 'ή':
+            switch (this) {
+            case GREEK: return "Η"; // Ή
+            default: // fall out
+            }
+            break;
+        case 'ί':
+            switch (this) {
+            case GREEK: return "Ι"; // Ί
+            default: // fall out
+            }
+            break;
+        case 'ό':
+            switch (this) {
+            case GREEK: return "Ο"; // Ό
+            default: // fall out
+            }
+            break;
+        case 'ύ':
+            switch (this) {
+            case GREEK: return "Υ"; // Ύ
+            default: // fall out
+            }
+            break;
+        case 'ώ':
+            switch (this) {
+            case GREEK: return "Ω"; // Ώ
+            default: // fall out
+            }
+            break;
+        }
+        throw new IllegalArgumentException(
+                "No upper case mapping for char=" + exception
+                        + " and this=" + this);
+    }
+
+    @SuppressWarnings(value = "EI_EXPOSE_REP", justification = "By design.")
+    public final Locale getLocale() {
+        return locale;
+    }
+
+    public String getSqlFormatString() {
+        return sql;
+    }
+
+    public String getSql(String expr) {
+        return String.format(sql, expr);
+    }
+
+    public String toUpperCase(String value) {
+        return OracleUpper.toUpperCase(this, value);
+    }
+
+    public static OracleUpperTable forLinguisticSort(String sort) {
+        return Enum.valueOf(OracleUpperTable.class, sort);
+    }
+}
+
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/package-info.java 
b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/package-info.java
new file mode 100644
index 0000000000..3878a7c082
--- /dev/null
+++ b/phoenix-core/src/main/java/org/apache/phoenix/util/i18n/package-info.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * This package contains utility classes partially copied from Salesforce's
+ * internationalization utility library (com.salesforce.i18n:i18n-util:1.0.4), 
which was
+ * released under the 3-clause BSD License.
+ *
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ */
+package org.apache.phoenix.util.i18n;
diff --git 
a/phoenix-core/src/test/java/org/apache/phoenix/util/i18n/LinguisticSortTest.java
 
b/phoenix-core/src/test/java/org/apache/phoenix/util/i18n/LinguisticSortTest.java
new file mode 100644
index 0000000000..7603b4d5b7
--- /dev/null
+++ 
b/phoenix-core/src/test/java/org/apache/phoenix/util/i18n/LinguisticSortTest.java
@@ -0,0 +1,650 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util.i18n;
+
+import static org.apache.phoenix.util.i18n.LinguisticSort.AZERBAIJANI;
+import static org.apache.phoenix.util.i18n.LinguisticSort.BASQUE;
+import static org.apache.phoenix.util.i18n.LinguisticSort.BENGALI;
+import static org.apache.phoenix.util.i18n.LinguisticSort.BOSNIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.BULGARIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.CATALAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.CHINESE_HK;
+import static org.apache.phoenix.util.i18n.LinguisticSort.CHINESE_HK_STROKE;
+import static org.apache.phoenix.util.i18n.LinguisticSort.CHINESE_TW;
+import static org.apache.phoenix.util.i18n.LinguisticSort.CHINESE_TW_STROKE;
+import static org.apache.phoenix.util.i18n.LinguisticSort.CROATIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.ESTONIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.FINNISH;
+import static org.apache.phoenix.util.i18n.LinguisticSort.HUNGARIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.ICELANDIC;
+import static org.apache.phoenix.util.i18n.LinguisticSort.JAPANESE;
+import static org.apache.phoenix.util.i18n.LinguisticSort.KOREAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.LATVIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.LITHUANIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.ROMANIAN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.SERBIAN_LATIN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.SLOVAK;
+import static org.apache.phoenix.util.i18n.LinguisticSort.SLOVENE;
+import static org.apache.phoenix.util.i18n.LinguisticSort.TAJIK;
+import static org.apache.phoenix.util.i18n.LinguisticSort.TURKISH;
+import static org.apache.phoenix.util.i18n.LinguisticSort.TURKMEN;
+import static org.apache.phoenix.util.i18n.LinguisticSort.VIETNAMESE;
+import static org.apache.phoenix.util.i18n.LinguisticSort.LUXEMBOURGISH;
+import static org.apache.phoenix.util.i18n.LinguisticSort.URDU;
+import static org.apache.phoenix.util.i18n.LinguisticSort.TAMIL;
+import static org.apache.phoenix.util.i18n.LinguisticSort.ESPERANTO;
+
+import com.ibm.icu.text.Normalizer2;
+
+import java.text.CollationKey;
+import java.text.Collator;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.phoenix.thirdparty.com.google.common.collect.ImmutableList;
+import org.apache.phoenix.thirdparty.com.google.common.collect.Ordering;
+
+import junit.framework.TestCase;
+
+/**
+ * This test class was partially copied from Salesforce's internationalization 
utility library
+ * (com.salesforce.i18n:i18n-util:1.0.4), which was released under the 
3-clause BSD License.
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ *
+ * This could be expanded significantly.
+ */
+public class LinguisticSortTest extends TestCase {
+
+    public LinguisticSortTest(String name) {
+        super(name);
+    }
+
+    public void testThaiBasicSorting() {
+        Locale thaiLoc = new Locale("th");
+
+        LinguisticSort thaiSort = LinguisticSort.get(thaiLoc);
+
+        // basic sanity check on thai collator comparisons
+        ImmutableList<String> unsorted =
+                ImmutableList.of("azw", "Ac", "ab", "21", "zaa", "b\u0e40k", 
"bk");
+        ImmutableList<String> sorted =
+                ImmutableList.of("21", "ab", "Ac", "azw", "bk", "b\u0e40k", 
"zaa");
+
+        assertEquals(sorted,
+                
Ordering.from(thaiSort.getNonCachingComparator()).sortedCopy(unsorted));
+        assertEquals(sorted,
+                
Ordering.from(thaiSort.getComparator(16)).sortedCopy(unsorted));
+    }
+
+    public void testThaiCharactersOfDeath() {
+        // This is the original bug report
+        Collator c = Collator.getInstance(new Locale("th"));
+        String s = "\u0e40";
+        // any one of \u0e40,  \u0e41, \u0e42, \u0e43, or \u0e44 will do
+        System.out.println(c.compare(s, s));  // In JDK6: runs forever
+
+
+        // Here's the "real" test
+        Locale thaiLoc = new Locale("th");
+
+        LinguisticSort thaiSort = LinguisticSort.get(thaiLoc);
+        Collator thaiColl = thaiSort.getCollator();
+
+        String [] oomStrings = {
+                "\u0e3f", "\u0e45", "\u0e40k", "\u0e44", "\u0e43", "\u0e42", 
"\u0e41", "\u0e40"
+        };
+        String [] srcStrings = oomStrings;
+        // Deprecated Patched collator adds space after problematic characters 
at end of string
+        // (because of http://bugs.sun.com/view_bug.do?bug_id=5047314)
+        // Otherwise unpatched collator would OOM on these strings
+        // String [] srcStrings = {
+        //   "\u0e3f", "\u0e45", "\u0e40k", "\u0e44 ", "\u0e43 ", "\u0e42 ", 
"\u0e41 ", "\u0e40 "
+        // };
+
+        for (int i=0; i<oomStrings.length;i++) {
+            String oomString = oomStrings[i];
+            CollationKey key = thaiColl.getCollationKey(oomString);
+            assertEquals("string #"+i, srcStrings[i], key.getSourceString());
+        }
+    }
+
+    public void testRolodexIndexByChar() throws Exception{
+        LinguisticSort englishSort = LinguisticSort.ENGLISH;
+
+        assertEquals(0, englishSort.getRolodexIndexForChar("a"));
+        assertEquals(0, englishSort.getRolodexIndexForChar("Á"));
+        assertEquals(1, englishSort.getRolodexIndexForChar("b"));
+        assertEquals(13, englishSort.getRolodexIndexForChar("N"));
+        assertEquals(13, englishSort.getRolodexIndexForChar("Ñ"));
+        assertEquals(25, englishSort.getRolodexIndexForChar("z"));
+        //А below is the Cyrillic А
+        assertOther(Arrays.asList("А", "こ"), englishSort);
+
+        //Spanish
+        LinguisticSort spanishSort = LinguisticSort.SPANISH;
+        assertEquals(0, spanishSort.getRolodexIndexForChar("a"));
+        assertEquals(0, spanishSort.getRolodexIndexForChar("Á"));
+        assertEquals(1, spanishSort.getRolodexIndexForChar("b"));
+        assertEquals(13, spanishSort.getRolodexIndexForChar("N"));
+        assertEquals(14, spanishSort.getRolodexIndexForChar("Ñ"));
+        assertEquals(26, spanishSort.getRolodexIndexForChar("z"));
+        //А below is the Cyrillic А
+        assertOther(Arrays.asList("А", "こ"), spanishSort);
+
+        //Japanese
+        LinguisticSort japaneseSort = LinguisticSort.JAPANESE;
+        assertEquals(0, japaneseSort.getRolodexIndexForChar("a"));
+        assertEquals(0, japaneseSort.getRolodexIndexForChar("Á"));
+        assertEquals(1, japaneseSort.getRolodexIndexForChar("b"));
+        assertEquals(13, japaneseSort.getRolodexIndexForChar("N"));
+        assertEquals(13, japaneseSort.getRolodexIndexForChar("Ñ"));
+        assertEquals(25, japaneseSort.getRolodexIndexForChar("z"));
+        assertEquals(27, japaneseSort.getRolodexIndexForChar("こ"));
+        assertEquals(27, japaneseSort.getRolodexIndexForChar("く"));
+        assertEquals(31, japaneseSort.getRolodexIndexForChar("ふ"));
+        //А below is the Cyrillic А
+        assertOther(Arrays.asList("\u0410"), spanishSort); // А
+
+        //Malay has a rolodex
+        LinguisticSort malaySort = LinguisticSort.MALAY;
+        assertEquals(0, malaySort.getRolodexIndexForChar("a"));
+        assertEquals(25, malaySort.getRolodexIndexForChar("z"));
+        assertOther(Arrays.asList("\u0410", "\u304f"), malaySort);  // "А", "く"
+
+        // Thai has a rolodex, all of these should be "other"
+        // (Thai has 44 chars, so other is 46)
+        LinguisticSort thaiSort = LinguisticSort.THAI;
+        assertConstant(Arrays.asList("A", "Á", "b", "\u304f", "\u0410"),
+                thaiSort, 46, "had a rolodex index.");
+
+    }
+
+    public void testRolodexComparedToIcu() {
+        Set<LinguisticSort> knownDifferences = EnumSet.of(
+                CATALAN, FINNISH, TURKISH, CHINESE_HK, CHINESE_HK_STROKE, 
CHINESE_TW,
+                CHINESE_TW_STROKE, JAPANESE, KOREAN, BULGARIAN, ROMANIAN, 
VIETNAMESE,
+                HUNGARIAN, SLOVAK, SERBIAN_LATIN, BOSNIAN, BASQUE, 
LUXEMBOURGISH, SLOVENE,
+                CROATIAN, ESTONIAN, ICELANDIC, LATVIAN, LITHUANIAN, TAJIK, 
TURKMEN, AZERBAIJANI,
+                URDU, BENGALI, TAMIL, ESPERANTO);
+
+        for (LinguisticSort sort : LinguisticSort.values()) {
+            if (knownDifferences.contains(sort)) {
+                continue;
+            }
+
+            String[] alphabet = sort.getAlphabet();
+            String[] icuAlphabet = 
LinguisticSort.getAlphabetFromICU(sort.getLocale());
+            String alphaAsString = Arrays.toString(alphabet);
+            String icuAlphaAsString = Arrays.toString(icuAlphabet);
+
+            assertEquals("LinguisticSort for " + sort + " doesn't match",
+                    icuAlphaAsString, alphaAsString);
+            if (!icuAlphaAsString.equals(alphaAsString)) {
+                System.out.println(sort + "\n" + icuAlphaAsString + "\n" + 
alphaAsString);
+            } else {
+                //System.out.println(sort + ":SAME");
+            }
+        }
+    }
+
+    private void assertOther(Collection<String> chars, LinguisticSort sort){
+        assertConstant(chars, sort, sort.getAlphabetLength(), "wasn't in 
'Other' category");
+    }
+
+    private void assertConstant(Collection<String> chars, LinguisticSort sort,
+                                int constant, String message) {
+        for (String c : chars){
+            assertEquals(c + " " + message, constant, 
sort.getRolodexIndexForChar(c));
+        }
+    }
+
+    /**
+     * Make sure the upper case collator works equivalently to upper-casing 
then collating
+     */
+    public void testUpperCaseCollator() {
+        // bump these up for performance testing
+        final int repeatTimes = 1;
+        final int testSize = 1000;
+
+        testUpperCaseCollator(true, repeatTimes, testSize);
+        testUpperCaseCollator(false, repeatTimes, testSize);
+    }
+
+    /**
+     * Implementation of the testUpperCaseCollator that allows breaking out an 
ascii only
+     * test from a general string test
+     */
+    private void testUpperCaseCollator(boolean asciiOnly, int repeatTimes, int 
testSize) {
+        final LinguisticSort sort = LinguisticSort.ENGLISH;
+        final Collator collator = sort.getCollator();
+
+        final Collator ucCollator = sort.getUpperCaseCollator(false);
+
+        final Random r = new Random();
+        final int maxLength = 100;
+        for (int iteration = 0; iteration < repeatTimes; iteration++) {
+            final boolean lastTime = iteration == repeatTimes - 1;
+            final String[] originals = new String[testSize];
+            for (int i = 0; i < testSize; i++) {
+                switch (i) {
+                    case 0:
+                        originals[i] = "abß";
+                        break;
+                    case 1:
+                        originals[i] = "abSS";
+                        break;
+                    case 2:
+                        originals[i] = "abß";
+                        break;
+                    case 3:
+                        originals[i] = "ffo";
+                        break;
+                    case 4:
+                        originals[i] = "ﬃ";
+                        break;
+                    case 5:
+                        originals[i] = "FFI";
+                        break;
+                    case 6:
+                        originals[i] = "fred";
+                        break;
+                    case 7:
+                        originals[i] = "FRED";
+                        break;
+                    case 8:
+                        originals[i] = "FREE";
+                        break;
+                    case 9:
+                        originals[i] = "剫";
+                        break;
+                    case 10:
+                        originals[i] = "뻎";
+                        break;
+                    case 11:
+                        originals[i] = "\u1fe3";
+                        break;
+                    case 12:
+                        originals[i] = "\u05d7";
+                        break;
+                    case 13:
+                        originals[i] = "\u1fd3";
+                        break;
+                    case 14:
+                        originals[i] = "\u1441";
+                        break;
+                    case 15:
+                        originals[i] = "\ub9fe";
+                        break;
+                    case 16:
+                        originals[i] = "\u0398";
+                        break;
+                    case 17:
+                        originals[i] = "\u0399";
+                        break;
+                    case 18:
+                        originals[i] = "\u039a";
+                        break;
+                    case 19:
+                        originals[i] = "\u4371";
+                        break;
+                    case 20:
+                        originals[i] = "\ufb06";
+                        break;
+                    default :
+                        originals[i] = randomString(r, maxLength, asciiOnly);
+                }
+            }
+
+            final int[] upperResults = new int[testSize];
+            {
+                final long start = System.currentTimeMillis();
+                for (int i = 0; i < testSize; i++) {
+                    final int next = i + 1 == testSize ? 0 : i + 1;
+                    upperResults[i] = 
collator.compare(sort.getUpperCaseValue(originals[i], false),
+                            sort.getUpperCaseValue(originals[next], false));
+                }
+                if (lastTime) {
+                    final long time = System.currentTimeMillis() - start;
+                    System.out.println("Compared " + testSize + " " + 
(asciiOnly ? "ascii " : "") +
+                            "strings with upper casing in " + time + "ms");
+                }
+            }
+
+            final int[] caseResults = new int[testSize];
+            {
+                final long start = System.currentTimeMillis();
+                for (int i = 0; i < testSize; i++) {
+                    final int next = i + 1 == testSize ? 0 : i + 1;
+                    caseResults[i] = ucCollator.compare(originals[i], 
originals[next]);
+                }
+                if (lastTime) {
+                    final long time = System.currentTimeMillis() - start;
+                    System.out.println("Compared " + testSize + " " + 
(asciiOnly ? "ascii " : "") +
+                            "strings with upper case collator comparison in " 
+ time + "ms");
+                }
+            }
+
+            final int[] keyResults = new int[testSize];
+            {
+                final long start = System.currentTimeMillis();
+                for (int i = 0; i < testSize; i++) {
+                    final int next = i + 1 == testSize ? 0 : i + 1;
+                    keyResults[i] = ucCollator.getCollationKey(originals[i])
+                            
.compareTo(ucCollator.getCollationKey(originals[next]));
+                }
+                if (lastTime) {
+                    final long time = System.currentTimeMillis() - start;
+                    System.out.println("Compared " + testSize + " " + 
(asciiOnly ? "ascii " : "") +
+                            "strings with collation keys in " + time + "ms");
+                }
+            }
+
+            if (lastTime) {
+                System.out.println();
+            }
+
+            if (lastTime) {
+                // normalizing helps see why strings don't compare the same 
when upper-cased
+                final Normalizer2 normalizer = Normalizer2.getNFKDInstance();
+                for (int i = 0; i < testSize; i++) {
+                    final int next = i + 1 == testSize ? 0 : i + 1;
+                    final boolean caseOk = upperResults[i] == caseResults[i];
+                    final boolean keyOk = upperResults[i] == keyResults[i];
+                    if (!caseOk || !keyOk) {
+                        final String message =
+                                "Did not get expected result when comparing 
string " + i + " " +
+                                (caseOk ? "" : "using upper case collator 
comparison ") +
+                                (caseOk || keyOk ? "" : "or ") +
+                                (keyOk ? ""  : "using collation key comparison 
") +
+                                "\n" +
+                                "'" + escape(originals[i]) + "'\n" +
+                                "(" + 
escape(sort.getUpperCaseValue(originals[i], false)) + ")\n" +
+                                "<" + 
escape(normalizer.normalize(originals[i])) + "> " +
+                                "with string " + next + " \n" +
+                                "'" + escape(originals[next]) + "'\n" +
+                                "(" + 
escape(sort.getUpperCaseValue(originals[next], false)) +
+                                ")\n " +
+                                "<" + 
escape(normalizer.normalize(originals[next])) + ">";
+                        assertEquals(message, upperResults[i], caseResults[i]);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * For diagnosis of mismatched strings, dumps a string using standard Java 
notation
+     * for escaping non-printable or non-ascii characters
+     */
+    private String escape(String string) {
+        final StringBuilder sb = new StringBuilder(string.length() * 2);
+        int index = 0;
+        while (index < string.length()) {
+            final int ch = string.codePointAt(index);
+            index += Character.charCount(ch);
+
+            escapeCodePoint(sb, ch);
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Escapes a single code point so that non-ascii and non-printable 
characters use
+     * their standard Java escape
+     */
+    private void escapeCodePoint(final StringBuilder sb, final int ch) {
+        switch(ch) {
+            case '\b' : sb.append("\\b");
+                break;
+            case '\t' : sb.append("\\t");
+                break;
+            case '\n' : sb.append("\\n");
+                break;
+            case '\r' : sb.append("\\r");
+                break;
+            case '\f' : sb.append("\\f");
+                break;
+            case '\"' : sb.append("\\\"");
+                break;
+            case '\\' : sb.append("\\\\");
+                break;
+            default:
+                if (ch < 0x20 || ch > 0x7E) {
+                    sb.append(String.format("\\u%04x", ch));
+                } else {
+                    sb.appendCodePoint(ch);
+                }
+        }
+    }
+
+    /**
+     * Generates a random string with between 0 and maxLength characters
+     */
+    private String randomString(Random r, int maxLength, boolean asciiOnly) {
+        final int length = r.nextInt(maxLength);
+        return randomFixedLengthString(r, length, asciiOnly);
+    }
+
+
+    /**
+     * Generates a random string of the given length
+     */
+    private String randomFixedLengthString(Random r, int length, boolean 
asciiOnly) {
+        final StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < length; i++) {
+            char c = 0;
+            while (!Character.isDefined(c) || Character.isISOControl(c)) {
+                c = (char)(asciiOnly ? r.nextInt(128) : r.nextInt());
+            }
+            sb.append(c);
+        }
+        return sb.toString();
+    }
+
+    public void testUpperCaseExceptionChars() {
+        // Sharp s in English
+        String[][] enCases = new String[][] {
+                // { input, expected output }
+                new String[] { "ß", "ß" },
+                new String[] { "ßß", "ßß" },
+                new String[] { "ßßß", "ßßß" },
+                new String[] { "aß", "Aß" },
+                new String[] { "aaaß", "AAAß" },
+                new String[] { "ßa", "ßA" },
+                new String[] { "ßaaa", "ßAAA" },
+                new String[] { "aßb", "AßB" },
+                new String[] { "aaaßbbb", "AAAßBBB" },
+                new String[] { "ßaß", "ßAß" },
+                new String[] { "ßaaaß", "ßAAAß" },
+                new String[] { "aßbßc", "AßBßC" },
+                new String[] { "aaaßbbbßccc", "AAAßBBBßCCC" },
+                new String[] { "aßßc", "AßßC" },
+                new String[] { "aaaßßccc", "AAAßßCCC" },
+        };
+
+        for (String[] c : enCases) {
+            assertEquals(c[1], LinguisticSort.ENGLISH.getUpperCaseValue(c[0], 
false));
+        }
+
+        // Omicron in Greek
+        String[][] greekCases = new String[][] {
+                new String[] { "\u039f", "\u039f" }, // capital omicron
+                new String[] { "Ό", "\u039f" }
+
+        };
+
+        for (String[] c : greekCases) {
+            assertEquals(c[1], LinguisticSort.GREEK.getUpperCaseValue(c[0], 
false));
+        }
+    }
+
+    public void testUsesUpper() {
+        assertTrue(LinguisticSort.ENGLISH.usesUpperToGetUpperCase(false));
+        assertTrue(LinguisticSort.ESPERANTO.usesUpperToGetUpperCase(false));
+        assertTrue(!LinguisticSort.GERMAN.usesUpperToGetUpperCase(false));
+    }
+
+    public void testGetUpperCaseCollationKey() {
+        assertEquals(LinguisticSort.ENGLISH.getUpperCaseSql("x", false),
+                LinguisticSort.ENGLISH.getUpperCollationKeySql("x", false));
+    }
+
+    /**
+     * I wanted to see the perf impact of doing special-case logic in the EN 
locale for the German
+     * sharp s, &#x00df;. Rename this test (remove the leading _) to run it, 
e.g. in Eclipse.
+     * <p>
+     * This method generates two sets of 1000 randomish Strings, one with 
sharp s and one without.
+     * Then it runs 1 million uppercase operations on each bank of strings, 
using the EN locale
+     * (with the special-case logic) and a test locale -- EO, Esperanto -- 
which does not have
+     * any special-case logic.
+     * <p>
+     * For posterity, when I run this on my machine, I see results like this
+     * (averages rounded to nearest 10ms):
+     * <p>
+     * <table>
+     * <tr><td></td><td>ENGLSIH</td><td>ESPERANTO</td><td>GREEK</td></tr>
+     * <tr><td>with sharp s</td><td>330ms</td><td>260ms</td><td>370ms</td></tr>
+     * <tr><td>without sharp 
s</td><td>150ms</td><td>130ms</td><td>213ms</td></tr>
+     * </table>
+     */
+    public void _testUpperCasePerf() {
+        String[] withSharpS = genStrings(1000, true);
+        String[] withoutSharpS = genStrings(1000, false);
+
+        System.out.println("ENGLISH, with ß:");
+        runUpperCase(LinguisticSort.ENGLISH, withSharpS);
+        System.out.println("ENGLISH, without ß:");
+        runUpperCase(LinguisticSort.ENGLISH, withoutSharpS);
+
+        System.out.println("ESPERANTO, with ß:");
+        runUpperCase(LinguisticSort.ESPERANTO, withSharpS);
+        System.out.println("ESPERANTO, without ß:");
+        runUpperCase(LinguisticSort.ESPERANTO, withoutSharpS);
+
+        // Interesting for having a lot of exceptions.
+        System.out.println("GREEK, with ß:");
+        runUpperCase(LinguisticSort.GREEK, withSharpS);
+        System.out.println("GREEK, without ß:");
+        runUpperCase(LinguisticSort.GREEK, withoutSharpS);
+    }
+
+    private void runUpperCase(LinguisticSort sort, String[] inputs) {
+        // Warm up
+        for (int i = 0; i < 10000; i++) {
+            sort.getUpperCaseValue(inputs[i % inputs.length], false);
+        }
+
+        // Run experiment
+        for (int i = 0; i < 3; i++) {
+            long start = System.currentTimeMillis();
+            for (int j = 0; j < 1000000; j++) {
+                sort.getUpperCaseValue(inputs[j % inputs.length], false);
+            }
+
+            System.out.println("[" + (i + 1) + "] Complete in " +
+                    (System.currentTimeMillis() - start) + "ms.");
+        }
+    }
+
+    /**
+     * Return n randomly generated strings, each containing at least
+     * one sharp s if useSharpS is true.
+     * */
+    private String[] genStrings(int n, boolean useSharpS) {
+        Random r = new Random();
+
+        String[] inputs = new String[n];
+        for (int i = 0; i < inputs.length; i++) {
+            inputs[i] = randomString(r, r.nextInt(12) + 1, r.nextBoolean())
+                    + (useSharpS? "ß" : "")
+                    + (r.nextBoolean() ?
+                    randomString(r, r.nextInt(12) + 1, r.nextBoolean()) + 
(useSharpS? "ß" : "")
+                    : "")
+                    + (randomString(r, r.nextInt(12) + 1, r.nextBoolean()));
+
+            if (!useSharpS) assertFalse(inputs[i].contains("ß"));
+        }
+        return inputs;
+    }
+
+    private List<String> cloneAndSort(LinguisticSort sort, List<String> 
source) {
+        List<String> result = new ArrayList<String>(source);
+        Collections.sort(result, sort.getCollator());
+        return result;
+    }
+
+    /**
+     * Validate that the sorting of the linguistic sorts for various locales 
is "correct"
+     * The toSort below is in this order.
+     * 阿嗄阾啊  : āáǎa
+     * 仈㶚 : bā bà
+     * 齑: ji
+     */
+    public void testChineseSorting() {
+        final List<String> toSort = ImmutableList.of("\u963f", "\u55c4", 
"\u963e",
+                "\u554a", "\u4ec8", "\u3d9a", "\u9f51");
+        assertEquals(ImmutableList.of("\u4ec8", "\u554a", "\u55c4", "\u3d9a", 
"\u963e",
+                "\u963f", "\u9f51"), cloneAndSort(LinguisticSort.CHINESE, 
toSort));
+        assertEquals(ImmutableList.of("\u4ec8", "\u554a", "\u55c4", "\u3d9a", 
"\u963e",
+                "\u963f", "\u9f51"), cloneAndSort(LinguisticSort.CHINESE_HK, 
toSort));
+        assertEquals(ImmutableList.of("\u4ec8", "\u554a", "\u55c4", "\u3d9a", 
"\u963e",
+                "\u963f", "\u9f51"), cloneAndSort(LinguisticSort.CHINESE_TW, 
toSort));
+        assertEquals(ImmutableList.of("\u4ec8", "\u963e", "\u963f", "\u554a", 
"\u55c4",
+                "\u9f51", "\u3d9a"), 
cloneAndSort(LinguisticSort.CHINESE_STROKE, toSort));
+        assertEquals(ImmutableList.of("\u4ec8", "\u963e", "\u963f", "\u554a", 
"\u55c4",
+                "\u9f51", "\u3d9a"), 
cloneAndSort(LinguisticSort.CHINESE_HK_STROKE, toSort));
+        assertEquals(ImmutableList.of("\u4ec8", "\u963e", "\u963f", "\u554a", 
"\u55c4",
+                "\u9f51", "\u3d9a"), 
cloneAndSort(LinguisticSort.CHINESE_TW_STROKE, toSort));
+        assertEquals(ImmutableList.of("\u963f", "\u55c4", "\u554a", "\u4ec8", 
"\u9f51",
+                "\u963e", "\u3d9a"), 
cloneAndSort(LinguisticSort.CHINESE_PINYIN, toSort));
+    }
+
+    public void testChineseLocaleMapping() {
+        assertEquals(LinguisticSort.CHINESE,
+                LinguisticSort.get(new Locale("zh")));
+        assertEquals(LinguisticSort.CHINESE_TW,
+                LinguisticSort.get(new Locale("zh","TW")));
+        assertEquals(LinguisticSort.CHINESE,
+                LinguisticSort.get(new Locale("zh","SG")));
+        assertEquals(LinguisticSort.CHINESE_HK,
+                LinguisticSort.get(new Locale("zh","HK")));
+        assertEquals(LinguisticSort.CHINESE_TW_STROKE,
+                LinguisticSort.get(new Locale("zh","TW","STROKE")));
+        assertEquals(LinguisticSort.CHINESE_HK_STROKE,
+                LinguisticSort.get(new Locale("zh","HK","STROKE")));
+        assertEquals(LinguisticSort.CHINESE_STROKE,
+                LinguisticSort.get(new Locale("zh","CN","STROKE")));
+        assertEquals(LinguisticSort.CHINESE_STROKE,
+                LinguisticSort.get(new Locale("zh","SG","STROKE")));
+        assertEquals(LinguisticSort.CHINESE_STROKE,
+                LinguisticSort.get(new Locale("zh","","STROKE")));
+        assertEquals(LinguisticSort.CHINESE_PINYIN,
+                LinguisticSort.get(new Locale("zh","CN","PINYIN")));
+        assertEquals(LinguisticSort.CHINESE_PINYIN,
+                LinguisticSort.get(new Locale("zh","SG","PINYIN")));
+        assertEquals(LinguisticSort.CHINESE_PINYIN,
+                LinguisticSort.get(new Locale("zh","","PINYIN")));
+    }
+}
diff --git 
a/phoenix-core/src/test/java/org/apache/phoenix/util/i18n/OracleUpperTableGeneratorTest.java
 
b/phoenix-core/src/test/java/org/apache/phoenix/util/i18n/OracleUpperTableGeneratorTest.java
new file mode 100644
index 0000000000..2e101cf78d
--- /dev/null
+++ 
b/phoenix-core/src/test/java/org/apache/phoenix/util/i18n/OracleUpperTableGeneratorTest.java
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util.i18n;
+
+import junit.framework.TestCase;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.Locale;
+
+/**
+ * This test class was partially copied from Salesforce's internationalization 
utility library
+ * (com.salesforce.i18n:i18n-util:1.0.4), which was released under the  
3-clause BSD License.
+ * The i18n-util library is not maintained anymore, and it was using 
vulnerable dependencies.
+ * For more info, see: https://issues.apache.org/jira/browse/PHOENIX-6818
+ *
+ * A generator for OracleUpperTable.java. This generator creates an 
OracleUpperTable for each
+ * of a number of {@link UpperExpr PL/SQL expressions}, which simply tabulates 
the these
+ * differences, allowing them to be compensated for.
+ * <p>
+ * May be run as a JUnit test or as a stand-alone Java application. Run the 
output in Oracle
+ * to generate the source for OracleUpperTable.java.
+ *
+ * @see OracleUpper
+ * @see OracleUpperTable
+ */
+public class OracleUpperTableGeneratorTest extends TestCase {
+
+    private static final char[] charsToTest = new char[] {
+            // i may be messed up for Turkic languages where it's supposed to 
upper-case
+            // to dotted I.
+            'i',
+            // Sharp s may upper-case to SS or itself, depending on the 
details.
+            'ß',
+            // Oracle removes tonos from all of these when upper-casing.
+            'Ά', 'Έ', 'Ή', 'Ί', 'Ό', 'Ύ','Ώ','ά','έ','ή','ί','ό','ύ','ώ'
+    };
+
+    /**
+     * Most of these were just generated from the LinguisticSort enum:
+     *
+     * <pre><code>
+     *     public static void generateValuesFromLinguisticSort() {
+     *         for (LinguisticSort s : LinguisticSort.values()) {
+     *             System.out.println(String.format("%1$s(\"%2$s\", 
\"%3$s\"),",
+     *                 s.name(), s.getUpperSqlFormatString(), 
s.getLocale().getLanguage()));
+     *         }
+     *     }
+     * </code></pre>
+     *
+     * Each value is a PL/SQL upper case expression that may return different 
results than
+     * Java's String.toUpperCase method for the given language.
+     */
+    private enum UpperExpr {
+        ENGLISH("upper(%s)", "en"),
+        GERMAN("nls_upper(%s, 'nls_sort=xgerman')", "de"),
+        FRENCH("nls_upper(%s, 'nls_sort=xfrench')", "fr"),
+        ITALIAN("nls_upper(%s, 'nls_sort=italian')", "it"),
+        SPANISH("nls_upper(%s, 'nls_sort=spanish')", "es"),
+        CATALAN("nls_upper(%s, 'nls_sort=catalan')", "ca"),
+        DUTCH("nls_upper(%s, 'nls_sort=dutch')", "nl"),
+        PORTUGUESE("nls_upper(%s, 'nls_sort=west_european')", "pt"),
+        DANISH("nls_upper(%s, 'nls_sort=danish')", "da"),
+        NORWEGIAN("nls_upper(%s, 'nls_sort=norwegian')", "no"),
+        SWEDISH("nls_upper(%s, 'nls_sort=swedish')", "sv"),
+        FINNISH("nls_upper(%s, 'nls_sort=finnish')", "fi"),
+        CZECH("nls_upper(%s, 'nls_sort=xczech')", "cs"),
+        POLISH("nls_upper(%s, 'nls_sort=polish')", "pl"),
+        TURKISH("nls_upper(translate(%s,'i','İ'), 'nls_sort=xturkish')", "tr"),
+        CHINESE_HK("nls_upper(to_single_byte(%s), 
'nls_sort=tchinese_radical_m')", "zh"),
+        CHINESE_TW("nls_upper(to_single_byte(%s), 
'nls_sort=tchinese_radical_m')", "zh"),
+        CHINESE("nls_upper(to_single_byte(%s), 
'nls_sort=schinese_radical_m')", "zh"),
+        JAPANESE("nls_upper(to_single_byte(%s), 'nls_sort=japanese_m')", "ja"),
+        KOREAN("nls_upper(to_single_byte(%s), 'nls_sort=korean_m')", "ko"),
+        RUSSIAN("nls_upper(%s, 'nls_sort=russian')", "ru"),
+        BULGARIAN("nls_upper(%s, 'nls_sort=bulgarian')", "bg"),
+        INDONESIAN("nls_upper(%s, 'nls_sort=indonesian')", "in"),
+        ROMANIAN("nls_upper(%s, 'nls_sort=romanian')", "ro"),
+        VIETNAMESE("nls_upper(%s, 'nls_sort=vietnamese')", "vi"),
+        UKRAINIAN("nls_upper(%s, 'nls_sort=ukrainian')", "uk"),
+        HUNGARIAN("nls_upper(%s, 'nls_sort=xhungarian')", "hu"),
+        GREEK("nls_upper(%s, 'nls_sort=greek')", "el"),
+        HEBREW("nls_upper(%s, 'nls_sort=hebrew')", "iw"),
+        SLOVAK("nls_upper(%s, 'nls_sort=slovak')", "sk"),
+        SERBIAN_CYRILLIC("nls_upper(%s, 'nls_sort=generic_m')", "sr"),
+        SERBIAN_LATIN("nls_upper(%s, 'nls_sort=xcroatian')", "sh"),
+        BOSNIAN("nls_upper(%s, 'nls_sort=xcroatian')", "bs"),
+        GEORGIAN("nls_upper(%s, 'nls_sort=binary')", "ka"),
+        BASQUE("nls_upper(%s, 'nls_sort=west_european')", "eu"),
+        MALTESE("nls_upper(%s, 'nls_sort=west_european')", "mt"),
+        ROMANSH("nls_upper(%s, 'nls_sort=west_european')", "rm"),
+        LUXEMBOURGISH("nls_upper(%s, 'nls_sort=west_european')", "lb"),
+        IRISH("nls_upper(%s, 'nls_sort=west_european')", "ga"),
+        SLOVENE("nls_upper(%s, 'nls_sort=xslovenian')", "sl"),
+        CROATIAN("nls_upper(%s, 'nls_sort=xcroatian')", "hr"),
+        MALAY("nls_upper(%s, 'nls_sort=malay')", "ms"),
+        ARABIC("nls_upper(%s, 'nls_sort=arabic')", "ar"),
+        ESTONIAN("nls_upper(%s, 'nls_sort=estonian')", "et"),
+        ICELANDIC("nls_upper(%s, 'nls_sort=icelandic')", "is"),
+        LATVIAN("nls_upper(%s, 'nls_sort=latvian')", "lv"),
+        LITHUANIAN("nls_upper(%s, 'nls_sort=lithuanian')", "lt"),
+        KYRGYZ("nls_upper(%s, 'nls_sort=binary')", "ky"),
+        KAZAKH("nls_upper(%s, 'nls_sort=binary')", "kk"),
+        TAJIK("nls_upper(%s, 'nls_sort=russian')", "tg"),
+        BELARUSIAN("nls_upper(%s, 'nls_sort=russian')", "be"),
+        TURKMEN("nls_upper(translate(%s,'i','İ'), 'nls_sort=xturkish')", "tk"),
+        AZERBAIJANI("nls_upper(translate(%s,'i','İ'), 'nls_sort=xturkish')", 
"az"),
+        ARMENIAN("nls_upper(%s, 'nls_sort=binary')", "hy"),
+        THAI("nls_upper(%s, 'nls_sort=thai_dictionary')", "th"),
+        HINDI("nls_upper(%s, 'nls_sort=binary')", "hi"),
+        URDU("nls_upper(%s, 'nls_sort=arabic')", "ur"),
+        BENGALI("nls_upper(%s, 'nls_sort=bengali')", "bn"),
+        TAMIL("nls_upper(%s, 'nls_sort=binary')", "ta"),
+        ESPERANTO("upper(%s)", "eo"),
+
+        // for formulas
+        XWEST_EUROPEAN("NLS_UPPER(%s,'NLS_SORT=xwest_european')", "en");
+
+
+        private final String expr;
+        private final Locale locale;
+
+        /**
+         * @param expr the PL/SQL expression with %s wildcards for the single 
string input.
+         * @param langCode ISO code for the language to use, as in
+         *                 <code> str.toUpperCase(new Locale(langCode))<code>.
+         */
+        private UpperExpr(String expr, String langCode) {
+            this.expr = expr;
+            this.locale = new Locale(langCode);
+        }
+
+        private String getSql(char value) {
+            return String.format(expr, "unistr('\\" + hexCodePoint(value) + 
"')");
+        }
+
+        private String getJava(char value) {
+            return Character.toString(value).toUpperCase(locale);
+        }
+    }
+
+    /**
+     * This method generates some anonymous PL/SQL routines which, when run, 
will generate an
+     * OracleUpperTable value for each {@code UpperExpr}. Each table is 
created by comparing
+     * the result of {@link String#toUpperCase(Locale)} against a
+     * {@link UpperExpr#getSql(char) PL/SQL expression}. The table contains 
all deviations from
+     * Oracle for each character in a {@link #charsToTest given set} that we 
know are fussy.
+     */
+    public static void generateUpperCaseExceptions(PrintWriter out) {
+
+        out.println("set serveroutput on;");
+        out.println("set define off;"); // So we don't have to escape 
ampersands.
+        out.println("/");
+        out.println("BEGIN");
+
+        putLine(out, "/*");
+        putLine(out, " * Licensed to the Apache Software Foundation (ASF) 
under one or more");
+        putLine(out, " * contributor license agreements.  See the NOTICE file 
distributed with");
+        putLine(out, " * this work for additional information regarding 
copyright ownership.");
+        putLine(out, " * The ASF licenses this file to you under the Apache 
License, Version 2.0");
+        putLine(out, " * (the \"License\"); you may not use this file except 
in compliance with");
+        putLine(out, " * the License.  You may obtain a copy of the License 
at");
+        putLine(out, " *");
+        putLine(out, " * http://www.apache.org/licenses/LICENSE-2.0";);
+        putLine(out, " *");
+        putLine(out, " * Unless required by applicable law or agreed to in 
writing, software");
+        putLine(out, " * distributed under the License is distributed on an 
\"AS IS\" BASIS,");
+        putLine(out, " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
express or implied.");
+        putLine(out, " * See the License for the specific language governing 
permissions and");
+        putLine(out, " * limitations under the License.");
+        putLine(out, " */");
+
+        putLine(out, "package i18n;");
+        putLine(out, "");
+        putLine(out, "import java.util.Locale;");
+        putLine(out, "import edu.umd.cs.findbugs.annotations.NonNull;");
+        putLine(out, "");
+        putLine(out, "/**");
+        putLine(out, " * Generated by " + 
OracleUpperTableGeneratorTest.class.getCanonicalName());
+        putLine(out, " * <p>");
+        putLine(out, " * An instance of this enum codifies the difference 
between executing a " +
+                "{@link #getSqlFormatString() particular PL/SQL");
+        putLine(out, " * expression} in Oracle and executing {@link 
String#toUpperCase(Locale)} " +
+                "for a {@link #getLocale() particular locale}");
+        putLine(out, " * in Java. These differences (also called exceptions) 
are expressed by " +
+                "the output of {@link #getUpperCaseExceptions()}");
+        putLine(out, " * and {@link #getUpperCaseExceptionMapping(char)}.");
+        putLine(out, " * <p>");
+        putLine(out, " * The tables are generated by testing a particular set 
of characters " +
+                "that are known to contain exceptions and");
+        putLine(out, " * {@link #toUpperCase(String) may be used} to 
compensate for exceptions " +
+                "found and generate output in Java that will be");
+        putLine(out, " * consistent with Oracle for the given (sql expression, 
locale) pair " +
+                "over all tested values.");
+        putLine(out, " * <p>");
+        putLine(out, " * Characters tested:");
+        putLine(out, " * <ul>");
+        for (char c : charsToTest) {
+            putLine(out, " * <li>U+%1$s &#x%1$s</li>", hexCodePoint(c));
+        }
+        putLine(out, " * </ul>");
+        putLine(out, " *");
+        putLine(out, " * @see OracleUpper");
+        putLine(out, " */");
+        putLine(out, "public enum OracleUpperTable {");
+
+        for (UpperExpr u : UpperExpr.values()) {
+            put(out, "    %s(\"%s\", \"%s\", \"", u.name(), u.expr, 
u.locale.getLanguage());
+
+            // Don't generate any exceptions for EO, it's a test value and
+            // I wanna use it as a baseline.
+            if (u != UpperExpr.ESPERANTO) {
+                for (char c : charsToTest) {
+                    String template = "IF %1$s <> '%2$s' THEN 
dbms_output.put(unistr('\\%3$s')); END IF;";
+                    out.println(String.format(template, u.getSql(c), 
u.getJava(c), hexCodePoint(c)));
+                }
+            }
+
+            putLine(out, "\"),");
+        }
+
+        putLine(out, "    ;");
+        putLine(out, "");
+        putLine(out, "    private final String sql;");
+        putLine(out, "    private final Locale locale;");
+        putLine(out, "    private final char[] exceptionChars;");
+        putLine(out, "");
+        putLine(out, "    private OracleUpperTable(String sql, String lang, " +
+                "String exceptionChars) {");
+        putLine(out, "        this.sql = sql;");
+        putLine(out, "        this.locale = new Locale(lang);");
+        putLine(out, "        this.exceptionChars = 
exceptionChars.toCharArray();");
+        putLine(out, "    }");
+        putLine(out, "");
+        putLine(out, "    /**");
+        putLine(out, "    * Return an array containing characters for which 
Java's " +
+                "String.toUpperCase method is known to");
+        putLine(out, "    * deviate from the result of Oracle evaluating 
{@link #getSql(String) " +
+                "this expression}.");
+        putLine(out, "    *");
+        putLine(out, "    * @return an array containing all exceptional 
characters.");
+        putLine(out, "    */");
+        putLine(out, "    final @NonNull char[] getUpperCaseExceptions() {");
+        putLine(out, "        return exceptionChars;");
+        putLine(out, "    }");
+        putLine(out, "");
+        putLine(out, "   /**");
+        putLine(out, "    * For a character, {@code exception}, contained in 
the String " +
+                "returned from");
+        putLine(out, "    * {@link #getUpperCaseExceptions()}, this method 
returns the " +
+                "anticipated result of upper-casing");
+        putLine(out, "    *  the character in Oracle when evaluating {@link 
#getSql(String) " +
+                "this expression}.");
+        putLine(out, "    *");
+        putLine(out, "    * @return the upper case of {@code exception}, 
according to what " +
+                "Oracle would do.");
+        putLine(out, "    * @throws IllegalArgumentException");
+        putLine(out, "    *             if the character is not contained in 
the String returned");
+        putLine(out, "    *             by {@link 
#getUpperCaseExceptions()}.");
+        putLine(out, "    */");
+        putLine(out, "    final String getUpperCaseExceptionMapping(char 
exception) {");
+
+        putLine(out, "        switch (exception) {");
+        for (char c : charsToTest){
+            putLine(out, "        case '%s':", "" + c);
+            putLine(out, "            switch (this) {");
+            for (UpperExpr u : UpperExpr.values()) {
+                if (u == UpperExpr.ESPERANTO) {
+                    continue;
+                }
+                String template = "IF %1$s <> '%2$s' THEN 
dbms_output.put_line('            " +
+                        "case %3$s: return ' || '\"' || %1$s || '\"; // 
%2$s'); END IF;";
+                out.println(String.format(template,
+                        u.getSql(c),
+                        u.getJava(c),
+                        u.name()));
+            }
+            putLine(out, "            default: // fall out");
+            putLine(out, "            }");
+            putLine(out, "            break;");
+        }
+        putLine(out, "        }");
+
+        putLine(out, "        throw new IllegalArgumentException(");
+        putLine(out, "                \"No upper case mapping for char=\" + 
exception");
+        putLine(out, "                + \" and this=\" + this);");
+        putLine(out, "    }");
+        putLine(out, "");
+
+        putLine(out, "    public final Locale getLocale() {");
+        putLine(out, "        return locale;");
+        putLine(out, "    }");
+        putLine(out, "");
+
+        putLine(out, "    public String getSqlFormatString() {");
+        putLine(out, "        return sql;");
+        putLine(out, "    }");
+        putLine(out, "");
+
+        putLine(out, "    public String getSql(String expr) {");
+        putLine(out, "        return String.format(sql, expr);");
+        putLine(out, "    }");
+        putLine(out, "");
+
+        putLine(out, "    public String toUpperCase(String value) {");
+        putLine(out, "        return OracleUpper.toUpperCase(this, value);");
+        putLine(out, "    }");
+        putLine(out, "");
+
+        putLine(out, "    public static final OracleUpperTable 
forLinguisticSort(String sort) {");
+        putLine(out, "        return Enum.valueOf(OracleUpperTable.class, 
sort);");
+        putLine(out, "    }");
+        putLine(out, "}");
+
+        out.println("END;");
+    }
+
+    /** Escape single quotes by doubling them up (i.e. two single quotes in a 
row). */
+    private static String sqlEscape(String str) {
+        //return TextUtil.replaceChar(str, '\'', "''");
+        return str.replace("'", "''");
+    }
+
+    /** Return four hex digits of the character's codepoint. */
+    private static String hexCodePoint(char c) {
+        String cp = Integer.toHexString(c);
+        while (cp.length() < 4) {
+            cp = "0" + cp;
+        }
+        return cp;
+    }
+
+    /** Send to standard output a dbms_output.put_line call that will emit the 
result of
+     * {@link String#format(String, Object...) formatting} {@code str} with 
{@code args}.
+     *
+     * @param str a format string
+     * @param args optional format arguments.
+     */
+    private static void put(PrintWriter out, String str, String... args) {
+        out.println("dbms_output.put('" + format(str, args) + "');");
+    }
+
+    /** Send to standard output a dbms_output.put call that will emit the 
result of
+     * {@link #format(String, String...) formatting} {@code str} with {@code 
args}.
+     *
+     * @param str a format string
+     * @param args optional format arguments.
+     */
+    private static void putLine(PrintWriter out, String str, String... args) {
+        out.println("dbms_output.put_line('" + format(str, args) + "');");
+    }
+
+    /**
+     *  Both {@code str} and {@code args} will be {@link #sqlEscape(String) 
sql escaped},
+     *  and then {@code str} will be {@link String#format(String, Object...) 
formatted}
+     *  using {@code args}.
+     */
+    private static String format(String str, String... args) {
+        str = sqlEscape(str);
+        if (args != null && args.length > 0) {
+            for (int i = 0; i < args.length; i++) {
+                args[i] = sqlEscape(args[i]);
+            }
+            str = String.format(str, (Object[])args);
+        }
+        return str;
+    }
+
+    public static void main(String[] args) {
+        generateUpperCaseExceptions(new PrintWriter(System.out));
+    }
+
+    public void testGenerateUpperCaseExceptions() {
+        // Don't bother logging it, just see if there's an exception
+        generateUpperCaseExceptions(new PrintWriter(new StringWriter()));
+    }
+}
diff --git a/pom.xml b/pom.xml
index 3cac0e074a..ef99d0de18 100644
--- a/pom.xml
+++ b/pom.xml
@@ -123,7 +123,7 @@
     <joni.version>2.1.31</joni.version>
     <omid.version>1.1.0</omid.version>
     <stream.version>2.9.5</stream.version>
-    <i18n-util.version>1.0.4</i18n-util.version>
+    <icu4j.version>72.1</icu4j.version>
     <guice.version>4.0</guice.version>
     <zookeeper.version>3.5.7</zookeeper.version>
     <curator.version>4.2.0</curator.version>
@@ -1428,9 +1428,14 @@
         <version>${stream.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.salesforce.i18n</groupId>
-        <artifactId>i18n-util</artifactId>
-        <version>${i18n-util.version}</version>
+        <groupId>com.ibm.icu</groupId>
+        <artifactId>icu4j</artifactId>
+        <version>${icu4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.ibm.icu</groupId>
+        <artifactId>icu4j-localespi</artifactId>
+        <version>${icu4j.version}</version>
       </dependency>
       <dependency>
         <groupId>com.lmax</groupId>

[phoenix] 03/03: PHOENIX-6818 Remove dependency on the i18n-util library

Reply via email to