Github user JamesRTaylor commented on a diff in the pull request: https://github.com/apache/phoenix/pull/275#discussion_r145230698 --- Diff: phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java --- @@ -0,0 +1,221 @@ +package org.apache.phoenix.expression.function; + +import java.sql.SQLException; +import java.text.Collator; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; + +import org.apache.commons.lang.BooleanUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.phoenix.expression.Expression; +import org.apache.phoenix.parse.FunctionParseNode; +import org.apache.phoenix.schema.tuple.Tuple; +import org.apache.phoenix.schema.types.PBoolean; +import org.apache.phoenix.schema.types.PDataType; +import org.apache.phoenix.schema.types.PInteger; +import org.apache.phoenix.schema.types.PIntegerArray; +import org.apache.phoenix.schema.types.PUnsignedIntArray; +import org.apache.phoenix.schema.types.PVarbinary; +import org.apache.phoenix.schema.types.PVarchar; +import org.apache.phoenix.schema.types.PhoenixArray; +import org.apache.phoenix.util.VarBinaryFormatter; + +import com.force.db.i18n.LinguisticSort; +import com.force.i18n.LocaleUtils; + +import com.ibm.icu.impl.jdkadapter.CollatorICU; +import com.ibm.icu.util.ULocale; + +/** + * A Phoenix Function that calculates a collation key for an input string based + * on a caller-provided locale and collator strength and decomposition settings. + * + * It uses the open-source grammaticus and i18n packages to obtain the collators + * it needs. + * + * @author snakhoda + * + */ +@FunctionParseNode.BuiltInFunction(name = CollationKeyFunction.NAME, args = { + // input string + @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }), + // ISO Code for Locale + @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }, isConstant = true), + // whether to use special upper case collator + @FunctionParseNode.Argument(allowedTypes = { PBoolean.class }, defaultValue = "false", isConstant = true), + // collator strength + @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true), + // collator decomposition + @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true) }) +public class CollationKeyFunction extends ScalarFunction { + + private static final Log LOG = LogFactory.getLog(CollationKeyFunction.class); + + public static final String NAME = "COLLKEY"; + + public CollationKeyFunction() { + } + + public CollationKeyFunction(List<Expression> children) throws SQLException { + super(children); + } + + @Override + public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) { + try { + String inputValue = getInputValue(tuple, ptr); --- End diff -- The evaluate method is called for every row during processing, so we want to have as little code here as possible. You can create a Collator local variable and move all the code that sets it up to an init() method. You'd call the init() method in the CollationKeyFunction(List<Expression> children) constructor and in an overridden readFields method like this (see InstrFunction for an example): @Override public void readFields(DataInput input) throws IOException { super.readFields(input); init(); }
---