Github user myui commented on a diff in the pull request: https://github.com/apache/incubator-hivemall/pull/108#discussion_r138026120 --- Diff: core/src/main/java/hivemall/tools/list/UDAFToOrderedList.java --- @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.list; + +import hivemall.utils.collections.BoundedPriorityQueue; +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.lang.CommandLineUtils; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo; +import org.apache.hadoop.hive.serde2.objectinspector.*; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.IntWritable; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.*; + +/** + * Return list of values sorted by value itself or specific key. + */ +@Description( + name = "to_ordered_list", + value = "_FUNC_(value [, key, const string options]) - Return list of values sorted by value itself or specific key") +public class UDAFToOrderedList extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) + throws SemanticException { + @SuppressWarnings("deprecation") + TypeInfo[] typeInfo = info.getParameters(); + ObjectInspector[] argOIs = info.getParameterObjectInspectors(); + if ((typeInfo.length == 1) || (typeInfo.length == 2 && HiveUtils.isConstString(argOIs[1]))) { + // sort values by value itself w/o key + if (typeInfo[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "Only primitive type arguments are accepted for value but " + + typeInfo[0].getTypeName() + " was passed as the first parameter."); + } + } else if ((typeInfo.length == 2) + || (typeInfo.length == 3 && HiveUtils.isConstString(argOIs[2]))) { + // sort values by key + if (typeInfo[1].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(1, + "Only primitive type arguments are accepted for key but " + + typeInfo[1].getTypeName() + " was passed as the second parameter."); + } + } else { + throw new UDFArgumentTypeException(typeInfo.length - 1, + "Number of arguments must be in [1, 3] including constant string for options: " + + typeInfo.length); + } + return new UDAFToOrderedListEvaluator(); + } + + public static class UDAFToOrderedListEvaluator extends GenericUDAFEvaluator { + + private ObjectInspector valueOI; + private PrimitiveObjectInspector keyOI; + + private ListObjectInspector valueListOI; + private ListObjectInspector keyListOI; + + private StructObjectInspector internalMergeOI; + + private StructField valueListField; + private StructField keyListField; + private StructField sizeField; + private StructField reverseOrderField; + + @Nonnegative + private int size; + private boolean reverseOrder; + private boolean sortByKey; + + protected Options getOptions() { + Options opts = new Options(); + opts.addOption("k", true, "To top-k (positive) or tail-k (negative) ordered queue"); + opts.addOption("reverse", "reverse_order", false, + "Sort values by key in a reverse (e.g., descending) order [default: false]"); + return opts; + } + + @Nonnull + protected final CommandLine parseOptions(String optionValue) throws UDFArgumentException { + String[] args = optionValue.split("\\s+"); + Options opts = getOptions(); + opts.addOption("help", false, "Show function help"); + CommandLine cl = CommandLineUtils.parseOptions(args, opts); + + if (cl.hasOption("help")) { + Description funcDesc = getClass().getAnnotation(Description.class); + final String cmdLineSyntax; + if (funcDesc == null) { + cmdLineSyntax = getClass().getSimpleName(); + } else { + String funcName = funcDesc.name(); + cmdLineSyntax = funcName == null ? getClass().getSimpleName() + : funcDesc.value().replace("_FUNC_", funcDesc.name()); + } + StringWriter sw = new StringWriter(); + sw.write('\n'); + PrintWriter pw = new PrintWriter(sw); + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp(pw, HelpFormatter.DEFAULT_WIDTH, cmdLineSyntax, null, opts, + HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, null, true); + pw.flush(); + String helpMsg = sw.toString(); + throw new UDFArgumentException(helpMsg); + } + + return cl; + } + + protected CommandLine processOptions(ObjectInspector[] argOIs) throws UDFArgumentException { + CommandLine cl = null; + + int optionIndex = 1; + if (sortByKey) { + optionIndex = 2; + } + + int k = 0; + boolean reverseOrder = false; + + if (argOIs.length >= optionIndex + 1) { + String rawArgs = HiveUtils.getConstString(argOIs[optionIndex]); + cl = parseOptions(rawArgs); + + reverseOrder = cl.hasOption("reverse_order"); + + if (cl.hasOption("k")) { + k = Integer.parseInt(cl.getOptionValue("k")); + if (k == 0) { + throw new UDFArgumentException("`k` must be nonzero: " + k); + } + } + } + + this.size = Math.abs(k); + + if ((k > 0 && reverseOrder) || (k < 0 && !reverseOrder) || (k == 0 && !reverseOrder)) { + // reverse top-k, natural tail-k = ascending = natural order output = reverse order priority queue + this.reverseOrder = true; + } else { // (k > 0 && !reverseOrder) || (k < 0 && reverseOrder) || (k == 0 && reverseOrder) + // natural top-k or reverse tail-k = descending = reverse order output = natural order priority queue + this.reverseOrder = false; --- End diff -- Why `k == 0 && reverseOrder` => `reverseOrder = false` ??
---