Repository: phoenix Updated Branches: refs/heads/4.x-HBase-1.2 107d2fb45 -> 970a184e2
PHOENIX-4418 - add locale-awareness to UPPER() and LOWER() functions Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/970a184e Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/970a184e Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/970a184e Branch: refs/heads/4.x-HBase-1.2 Commit: 970a184e2d6ecbc051bda15b08475af6a2fbe67d Parents: 107d2fb Author: Shehzaad <snakh...@salesforce.com> Authored: Wed Jan 31 02:21:21 2018 -0800 Committer: Thomas D'Silva <tdsi...@apache.org> Committed: Sat Mar 10 13:47:50 2018 -0800 ---------------------------------------------------------------------- .../apache/phoenix/end2end/IndexExtendedIT.java | 6 +- .../org/apache/phoenix/end2end/IndexToolIT.java | 6 +- .../function/CollationKeyFunction.java | 18 +--- .../expression/function/LowerFunction.java | 28 +++++- .../expression/function/ScalarFunction.java | 15 +++- .../expression/function/UpperFunction.java | 29 +++++- .../expression/function/LowerFunctionTest.java | 93 ++++++++++++++++++++ .../expression/function/UpperFunctionTest.java | 92 +++++++++++++++++++ 8 files changed, 259 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexExtendedIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexExtendedIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexExtendedIT.java index 3858825..624f3e3 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexExtendedIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexExtendedIT.java @@ -131,7 +131,7 @@ public class IndexExtendedIT extends BaseTest { IndexToolIT.upsertRow(stmt1, id++); conn.commit(); - stmt.execute(String.format("CREATE " + (localIndex ? "LOCAL" : "") + " INDEX %s ON %s (UPPER(NAME)) ASYNC ", indexTableName,dataTableFullName)); + stmt.execute(String.format("CREATE " + (localIndex ? "LOCAL" : "") + " INDEX %s ON %s (UPPER(NAME, 'en_US')) ASYNC ", indexTableName,dataTableFullName)); //update a row stmt1.setInt(1, 1); @@ -141,13 +141,13 @@ public class IndexExtendedIT extends BaseTest { conn.commit(); //verify rows are fetched from data table. - String selectSql = String.format("SELECT ID FROM %s WHERE UPPER(NAME) ='UNAME2'",dataTableFullName); + String selectSql = String.format("SELECT ID FROM %s WHERE UPPER(NAME, 'en_US') ='UNAME2'",dataTableFullName); ResultSet rs = conn.createStatement().executeQuery("EXPLAIN " + selectSql); String actualExplainPlan = QueryUtil.getExplainPlan(rs); //assert we are pulling from data table. assertEquals(String.format("CLIENT PARALLEL 1-WAY FULL SCAN OVER %s\n" + - " SERVER FILTER BY UPPER(NAME) = 'UNAME2'",dataTableFullName),actualExplainPlan); + " SERVER FILTER BY UPPER(NAME, 'en_US') = 'UNAME2'",dataTableFullName),actualExplainPlan); rs = stmt1.executeQuery(selectSql); assertTrue(rs.next()); http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java index a9128ab..afb6d72 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java @@ -163,14 +163,14 @@ public class IndexToolIT extends BaseTest { String stmtString2 = String.format( - "CREATE %s INDEX %s ON %s (LPAD(UPPER(NAME),8,'x')||'_xyz') ASYNC ", + "CREATE %s INDEX %s ON %s (LPAD(UPPER(NAME, 'en_US'),8,'x')||'_xyz') ASYNC ", (localIndex ? "LOCAL" : ""), indexTableName, dataTableFullName); conn.createStatement().execute(stmtString2); // verify rows are fetched from data table. String selectSql = String.format( - "SELECT ID FROM %s WHERE LPAD(UPPER(NAME),8,'x')||'_xyz' = 'xxUNAME2_xyz'", + "SELECT ID FROM %s WHERE LPAD(UPPER(NAME, 'en_US'),8,'x')||'_xyz' = 'xxUNAME2_xyz'", dataTableFullName); ResultSet rs = conn.createStatement().executeQuery("EXPLAIN " + selectSql); String actualExplainPlan = QueryUtil.getExplainPlan(rs); @@ -178,7 +178,7 @@ public class IndexToolIT extends BaseTest { // assert we are pulling from data table. assertEquals(String.format( "CLIENT PARALLEL 1-WAY FULL SCAN OVER %s\n" - + " SERVER FILTER BY (LPAD(UPPER(NAME), 8, 'x') || '_xyz') = 'xxUNAME2_xyz'", + + " SERVER FILTER BY (LPAD(UPPER(NAME, 'en_US'), 8, 'x') || '_xyz') = 'xxUNAME2_xyz'", dataTableFullName), actualExplainPlan); rs = stmt1.executeQuery(selectSql); http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java index 6adfb7e..cd1ef24 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java @@ -190,20 +190,4 @@ public class CollationKeyFunction extends ScalarFunction { // TODO: Look into calling freeze() on them to be able return true here. return false; } - - private <T> T getLiteralValue(int childIndex, Class<T> type) { - Expression expression = getChildren().get(childIndex); - if (LOG.isDebugEnabled()) { - LOG.debug("child: " + childIndex + ", expression: " + expression); - } - // It's safe to assume expression is a LiteralExpression since - // only arguments marked as isConstant = true should be handled through - // this method. - return type.cast(((LiteralExpression) expression).getValue()); - } - - @Override - public boolean isNullable() { - return getChildren().get(0).isNullable(); - } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java index 8b79fe5..8d468b3 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LowerFunction.java @@ -18,8 +18,11 @@ package org.apache.phoenix.expression.function; +import java.io.DataInput; +import java.io.IOException; import java.sql.SQLException; import java.util.List; +import java.util.Locale; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.phoenix.expression.Expression; @@ -28,16 +31,35 @@ import org.apache.phoenix.schema.tuple.Tuple; import org.apache.phoenix.schema.types.PDataType; import org.apache.phoenix.schema.types.PVarchar; +import com.force.i18n.LocaleUtils; + @FunctionParseNode.BuiltInFunction(name=LowerFunction.NAME, args={ - @FunctionParseNode.Argument(allowedTypes={PVarchar.class})} ) + @FunctionParseNode.Argument(allowedTypes={PVarchar.class}), + @FunctionParseNode.Argument(allowedTypes={PVarchar.class}, defaultValue="null", isConstant=true)} ) public class LowerFunction extends ScalarFunction { public static final String NAME = "LOWER"; + private Locale locale = null; + public LowerFunction() { } public LowerFunction(List<Expression> children) throws SQLException { super(children); + initialize(); + } + + private void initialize() { + if(children.size() > 1) { + String localeISOCode = getLiteralValue(1, String.class); + locale = LocaleUtils.get().getLocaleByIsoCode(localeISOCode); + } + } + + @Override + public void readFields(DataInput input) throws IOException { + super.readFields(input); + initialize(); } @Override @@ -55,7 +77,9 @@ public class LowerFunction extends ScalarFunction { return true; } - ptr.set(PVarchar.INSTANCE.toBytes(sourceStr.toLowerCase())); + String resultStr = locale == null ? sourceStr.toLowerCase() : sourceStr.toLowerCase(locale); + + ptr.set(PVarchar.INSTANCE.toBytes(resultStr)); return true; } http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ScalarFunction.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ScalarFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ScalarFunction.java index 2a5fe44..30adbd7 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ScalarFunction.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ScalarFunction.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.phoenix.compile.KeyPart; import org.apache.phoenix.expression.Expression; +import org.apache.phoenix.expression.LiteralExpression; import org.apache.phoenix.expression.visitor.ExpressionVisitor; import org.apache.phoenix.util.ByteUtil; @@ -51,7 +52,19 @@ public abstract class ScalarFunction extends FunctionExpression { byte[] key = ByteUtil.copyKeyBytesIfNecessary(ptr); return key; } - + + /** + * Retrieve the literal value at childIndex. The argument must be a constant + * (i.e. marked as isConstant=true) + */ + protected final <T> T getLiteralValue(int childIndex, Class<T> type) { + Expression expression = getChildren().get(childIndex); + // It's safe to assume expression is a LiteralExpression since + // only arguments marked as isConstant = true should be handled through + // this method. + return type.cast(((LiteralExpression) expression).getValue()); + } + @Override public <T> T accept(ExpressionVisitor<T> visitor) { List<T> l = acceptChildren(visitor, visitor.visitEnter(this)); http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java index 3a6305c..c8e7096 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/UpperFunction.java @@ -18,8 +18,11 @@ package org.apache.phoenix.expression.function; +import java.io.DataInput; +import java.io.IOException; import java.sql.SQLException; import java.util.List; +import java.util.Locale; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; @@ -27,18 +30,38 @@ import org.apache.phoenix.expression.Expression; import org.apache.phoenix.parse.FunctionParseNode; import org.apache.phoenix.schema.types.PDataType; import org.apache.phoenix.schema.types.PVarchar; + +import com.force.i18n.LocaleUtils; + import org.apache.phoenix.schema.tuple.Tuple; @FunctionParseNode.BuiltInFunction(name=UpperFunction.NAME, args={ - @FunctionParseNode.Argument(allowedTypes={PVarchar.class})} ) + @FunctionParseNode.Argument(allowedTypes={PVarchar.class}), + @FunctionParseNode.Argument(allowedTypes={PVarchar.class}, defaultValue="null", isConstant=true)} ) public class UpperFunction extends ScalarFunction { public static final String NAME = "UPPER"; + private Locale locale; + public UpperFunction() { } public UpperFunction(List<Expression> children) throws SQLException { super(children); + initialize(); + } + + private void initialize() { + if(children.size() > 1) { + String localeISOCode = getLiteralValue(1, String.class); + locale = LocaleUtils.get().getLocaleByIsoCode(localeISOCode); + } + } + + @Override + public void readFields(DataInput input) throws IOException { + super.readFields(input); + initialize(); } @Override @@ -52,7 +75,9 @@ public class UpperFunction extends ScalarFunction { return true; } - ptr.set(PVarchar.INSTANCE.toBytes(sourceStr.toUpperCase())); + String resultStr = locale == null ? sourceStr.toUpperCase() : sourceStr.toUpperCase(locale); + + ptr.set(PVarchar.INSTANCE.toBytes(resultStr)); return true; } http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/test/java/org/apache/phoenix/expression/function/LowerFunctionTest.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/function/LowerFunctionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/LowerFunctionTest.java new file mode 100644 index 0000000..f1c119f --- /dev/null +++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/LowerFunctionTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.expression.function; + +import static org.junit.Assert.assertEquals; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.phoenix.expression.Expression; +import org.apache.phoenix.expression.LiteralExpression; +import org.apache.phoenix.schema.SortOrder; +import org.apache.phoenix.schema.types.PVarchar; +import org.junit.Test; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; + +/** + * "Unit" tests for LowerFunction + * + */ +public class LowerFunctionTest { + + + // These maps were obtained from Java API docs for java.lang.String + // https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#toLowerCase-java.util.Locale- + private static ImmutableMap<String, String> turkishLowerToUpperCaseMap = + ImmutableMap.of("\u0130", "\u0069", + "\u0049", "\u0131"); + + private static ImmutableMap<String, String> anyLocaleLowerToUpperCaseMap = + ImmutableMap.of( "\u0399\u03a7\u0398\u03a5\u03a3", "\u03b9\u03c7\u03b8\u03c5\u03c2", + // IXÎÏΣ -> ιÏÎ¸Ï Ï (the last character is the "lunate sigma") + "FrEnCh Fries", "french fries"); + + @Test + public void testTurkishUpperCase() throws Exception { + testLowerToUpperCaseMap(turkishLowerToUpperCaseMap, "tr"); + } + + @Test + public void testUniversalUpperCaseNoLocale() throws Exception { + testLowerToUpperCaseMap(anyLocaleLowerToUpperCaseMap, null); + } + + @Test + public void testUniversalUpperCaseTurkish() throws Exception { + testLowerToUpperCaseMap(anyLocaleLowerToUpperCaseMap, "tr"); + } + + private void testLowerToUpperCaseMap(Map<String, String> lowerToUpperMap, String locale) throws Exception { + for(Map.Entry<String, String> lowerUpperPair: lowerToUpperMap.entrySet()) { + String upperCaseResultAsc = callFunction(lowerUpperPair.getKey(), locale, SortOrder.ASC); + String upperCaseResultDesc = callFunction(lowerUpperPair.getKey(), locale, SortOrder.DESC); + + assertEquals("Result of calling LowerFunction[ASC] on [" + lowerUpperPair.getKey() + "][" + locale + "] not as expected.", + lowerUpperPair.getValue(), upperCaseResultAsc); + assertEquals("Result of calling LowerFunction[DESC] on [" + lowerUpperPair.getKey() + "][" + locale + "] not as expected.", + lowerUpperPair.getValue(), upperCaseResultDesc); + } + } + + private static String callFunction(String inputStr, String localeIsoCode, SortOrder sortOrder) throws Exception { + LiteralExpression inputStrLiteral, localeIsoCodeLiteral; + inputStrLiteral = LiteralExpression.newConstant(inputStr, PVarchar.INSTANCE, sortOrder); + localeIsoCodeLiteral = LiteralExpression.newConstant(localeIsoCode, PVarchar.INSTANCE, sortOrder); + List<Expression> expressions = Lists.newArrayList((Expression) inputStrLiteral, + (Expression) localeIsoCodeLiteral); + Expression lowerFunction = new LowerFunction(expressions); + ImmutableBytesWritable ptr = new ImmutableBytesWritable(); + boolean ret = lowerFunction.evaluate(null, ptr); + String result = ret + ? (String) lowerFunction.getDataType().toObject(ptr, lowerFunction.getSortOrder()) : null; + return result; + } +} http://git-wip-us.apache.org/repos/asf/phoenix/blob/970a184e/phoenix-core/src/test/java/org/apache/phoenix/expression/function/UpperFunctionTest.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/function/UpperFunctionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/UpperFunctionTest.java new file mode 100644 index 0000000..d3a57b3 --- /dev/null +++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/UpperFunctionTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.expression.function; + +import static org.junit.Assert.assertEquals; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.phoenix.expression.Expression; +import org.apache.phoenix.expression.LiteralExpression; +import org.apache.phoenix.schema.SortOrder; +import org.apache.phoenix.schema.types.PVarchar; +import org.junit.Test; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; + +/** + * "Unit" tests for UpperFunction + * + */ +public class UpperFunctionTest { + + + // These maps were obtained from Java API docs for java.lang.String + // https://docs.oracle.com/javase/8/docs/api/java/lang/String.html#toUpperCase-java.util.Locale- + private static ImmutableMap<String, String> turkishLowerToUpperCaseMap = + ImmutableMap.of("\u0069", "\u0130", + "\u0131", "\u0049"); + + private static ImmutableMap<String, String> anyLocaleLowerToUpperCaseMap = + ImmutableMap.of("\u00df", "\u0053\u0053", + "Fahrvergnügen", "FAHRVERGNÃGEN"); + + @Test + public void testTurkishUpperCase() throws Exception { + testLowerToUpperCaseMap(turkishLowerToUpperCaseMap, "tr"); + } + + @Test + public void testUniversalUpperCaseNoLocale() throws Exception { + testLowerToUpperCaseMap(anyLocaleLowerToUpperCaseMap, null); + } + + @Test + public void testUniversalUpperCaseTurkish() throws Exception { + testLowerToUpperCaseMap(anyLocaleLowerToUpperCaseMap, "tr"); + } + + private void testLowerToUpperCaseMap(Map<String, String> lowerToUpperMap, String locale) throws Exception { + for(Map.Entry<String, String> lowerUpperPair: lowerToUpperMap.entrySet()) { + String upperCaseResultAsc = callFunction(lowerUpperPair.getKey(), locale, SortOrder.ASC); + String upperCaseResultDesc = callFunction(lowerUpperPair.getKey(), locale, SortOrder.DESC); + + assertEquals("Result of calling UpperFunction[ASC] on [" + lowerUpperPair.getKey() + "][" + locale + "] not as expected.", + lowerUpperPair.getValue(), upperCaseResultAsc); + assertEquals("Result of calling UpperFunction[DESC] on [" + lowerUpperPair.getKey() + "][" + locale + "] not as expected.", + lowerUpperPair.getValue(), upperCaseResultDesc); + } + } + + private static String callFunction(String inputStr, String localeIsoCode, SortOrder sortOrder) throws Exception { + LiteralExpression inputStrLiteral, localeIsoCodeLiteral; + inputStrLiteral = LiteralExpression.newConstant(inputStr, PVarchar.INSTANCE, sortOrder); + localeIsoCodeLiteral = LiteralExpression.newConstant(localeIsoCode, PVarchar.INSTANCE, sortOrder); + List<Expression> expressions = Lists.newArrayList((Expression) inputStrLiteral, + (Expression) localeIsoCodeLiteral); + Expression upperFunction = new UpperFunction(expressions); + ImmutableBytesWritable ptr = new ImmutableBytesWritable(); + boolean ret = upperFunction.evaluate(null, ptr); + String result = ret + ? (String) upperFunction.getDataType().toObject(ptr, upperFunction.getSortOrder()) : null; + return result; + } +}