Github user myui commented on a diff in the pull request:

    https://github.com/apache/incubator-hivemall/pull/108#discussion_r138026120
  
    --- Diff: core/src/main/java/hivemall/tools/list/UDAFToOrderedList.java ---
    @@ -0,0 +1,535 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package hivemall.tools.list;
    +
    +import hivemall.utils.collections.BoundedPriorityQueue;
    +import hivemall.utils.hadoop.HiveUtils;
    +import hivemall.utils.lang.CommandLineUtils;
    +
    +import org.apache.commons.cli.CommandLine;
    +import org.apache.commons.cli.HelpFormatter;
    +import org.apache.commons.cli.Options;
    +import org.apache.hadoop.hive.ql.exec.Description;
    +import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
    +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
    +import org.apache.hadoop.hive.ql.metadata.HiveException;
    +import org.apache.hadoop.hive.ql.parse.SemanticException;
    +import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
    +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
    +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
    +import org.apache.hadoop.hive.serde2.objectinspector.*;
    +import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
    +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    +import org.apache.hadoop.io.BooleanWritable;
    +import org.apache.hadoop.io.IntWritable;
    +
    +import javax.annotation.Nonnegative;
    +import javax.annotation.Nonnull;
    +import java.io.PrintWriter;
    +import java.io.StringWriter;
    +import java.util.*;
    +
    +/**
    + * Return list of values sorted by value itself or specific key.
    + */
    +@Description(
    +        name = "to_ordered_list",
    +        value = "_FUNC_(value [, key, const string options]) - Return list 
of values sorted by value itself or specific key")
    +public class UDAFToOrderedList extends AbstractGenericUDAFResolver {
    +
    +    @Override
    +    public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info)
    +            throws SemanticException {
    +        @SuppressWarnings("deprecation")
    +        TypeInfo[] typeInfo = info.getParameters();
    +        ObjectInspector[] argOIs = info.getParameterObjectInspectors();
    +        if ((typeInfo.length == 1) || (typeInfo.length == 2 && 
HiveUtils.isConstString(argOIs[1]))) {
    +            // sort values by value itself w/o key
    +            if (typeInfo[0].getCategory() != 
ObjectInspector.Category.PRIMITIVE) {
    +                throw new UDFArgumentTypeException(0,
    +                    "Only primitive type arguments are accepted for value 
but "
    +                            + typeInfo[0].getTypeName() + " was passed as 
the first parameter.");
    +            }
    +        } else if ((typeInfo.length == 2)
    +                || (typeInfo.length == 3 && 
HiveUtils.isConstString(argOIs[2]))) {
    +            // sort values by key
    +            if (typeInfo[1].getCategory() != 
ObjectInspector.Category.PRIMITIVE) {
    +                throw new UDFArgumentTypeException(1,
    +                    "Only primitive type arguments are accepted for key 
but "
    +                            + typeInfo[1].getTypeName() + " was passed as 
the second parameter.");
    +            }
    +        } else {
    +            throw new UDFArgumentTypeException(typeInfo.length - 1,
    +                "Number of arguments must be in [1, 3] including constant 
string for options: "
    +                        + typeInfo.length);
    +        }
    +        return new UDAFToOrderedListEvaluator();
    +    }
    +
    +    public static class UDAFToOrderedListEvaluator extends 
GenericUDAFEvaluator {
    +
    +        private ObjectInspector valueOI;
    +        private PrimitiveObjectInspector keyOI;
    +
    +        private ListObjectInspector valueListOI;
    +        private ListObjectInspector keyListOI;
    +
    +        private StructObjectInspector internalMergeOI;
    +
    +        private StructField valueListField;
    +        private StructField keyListField;
    +        private StructField sizeField;
    +        private StructField reverseOrderField;
    +
    +        @Nonnegative
    +        private int size;
    +        private boolean reverseOrder;
    +        private boolean sortByKey;
    +
    +        protected Options getOptions() {
    +            Options opts = new Options();
    +            opts.addOption("k", true, "To top-k (positive) or tail-k 
(negative) ordered queue");
    +            opts.addOption("reverse", "reverse_order", false,
    +                "Sort values by key in a reverse (e.g., descending) order 
[default: false]");
    +            return opts;
    +        }
    +
    +        @Nonnull
    +        protected final CommandLine parseOptions(String optionValue) 
throws UDFArgumentException {
    +            String[] args = optionValue.split("\\s+");
    +            Options opts = getOptions();
    +            opts.addOption("help", false, "Show function help");
    +            CommandLine cl = CommandLineUtils.parseOptions(args, opts);
    +
    +            if (cl.hasOption("help")) {
    +                Description funcDesc = 
getClass().getAnnotation(Description.class);
    +                final String cmdLineSyntax;
    +                if (funcDesc == null) {
    +                    cmdLineSyntax = getClass().getSimpleName();
    +                } else {
    +                    String funcName = funcDesc.name();
    +                    cmdLineSyntax = funcName == null ? 
getClass().getSimpleName()
    +                            : funcDesc.value().replace("_FUNC_", 
funcDesc.name());
    +                }
    +                StringWriter sw = new StringWriter();
    +                sw.write('\n');
    +                PrintWriter pw = new PrintWriter(sw);
    +                HelpFormatter formatter = new HelpFormatter();
    +                formatter.printHelp(pw, HelpFormatter.DEFAULT_WIDTH, 
cmdLineSyntax, null, opts,
    +                    HelpFormatter.DEFAULT_LEFT_PAD, 
HelpFormatter.DEFAULT_DESC_PAD, null, true);
    +                pw.flush();
    +                String helpMsg = sw.toString();
    +                throw new UDFArgumentException(helpMsg);
    +            }
    +
    +            return cl;
    +        }
    +
    +        protected CommandLine processOptions(ObjectInspector[] argOIs) 
throws UDFArgumentException {
    +            CommandLine cl = null;
    +
    +            int optionIndex = 1;
    +            if (sortByKey) {
    +                optionIndex = 2;
    +            }
    +
    +            int k = 0;
    +            boolean reverseOrder = false;
    +
    +            if (argOIs.length >= optionIndex + 1) {
    +                String rawArgs = 
HiveUtils.getConstString(argOIs[optionIndex]);
    +                cl = parseOptions(rawArgs);
    +
    +                reverseOrder = cl.hasOption("reverse_order");
    +
    +                if (cl.hasOption("k")) {
    +                    k = Integer.parseInt(cl.getOptionValue("k"));
    +                    if (k == 0) {
    +                        throw new UDFArgumentException("`k` must be 
nonzero: " + k);
    +                    }
    +                }
    +            }
    +
    +            this.size = Math.abs(k);
    +
    +            if ((k > 0 && reverseOrder) || (k < 0 && !reverseOrder) || (k 
== 0 && !reverseOrder)) {
    +                // reverse top-k, natural tail-k = ascending = natural 
order output = reverse order priority queue
    +                this.reverseOrder = true;
    +            } else { // (k > 0 && !reverseOrder) || (k < 0 && 
reverseOrder) || (k == 0 && reverseOrder)
    +                // natural top-k or reverse tail-k = descending = reverse 
order output = natural order priority queue
    +                this.reverseOrder = false;
    --- End diff --
    
    Why `k == 0 && reverseOrder` => `reverseOrder = false` ??


---

Reply via email to