This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit a7cd99d8c20ab4b38487fd84afd36d1682310b09 Author: Laszlo Bodor <bodorlaszlo0...@gmail.com> AuthorDate: Wed Mar 27 09:24:39 2019 +0100 HIVE-16255: Support percentile_cont / percentile_disc (Laszlo Bodor via Zoltan Haindrich) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> --- .../hadoop/hive/ql/exec/FunctionRegistry.java | 2 + .../ql/udf/generic/GenericUDAFPercentileCont.java | 502 ++++++++++++ .../ql/udf/generic/GenericUDAFPercentileDisc.java | 139 ++++ .../udf/generic/TestGenericUDAFPercentileCont.java | 181 +++++ .../udf/generic/TestGenericUDAFPercentileDisc.java | 182 +++++ .../queries/clientpositive/udaf_percentile_cont.q | 69 ++ .../queries/clientpositive/udaf_percentile_disc.q | 69 ++ .../results/clientpositive/show_functions.q.out | 2 + .../clientpositive/udaf_percentile_cont.q.out | 421 +++++++++++ .../clientpositive/udaf_percentile_cont_disc.q.out | 842 +++++++++++++++++++++ .../clientpositive/udaf_percentile_disc.q.out | 421 +++++++++++ 11 files changed, 2830 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index ed41bef..253570b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -473,6 +473,8 @@ public final class FunctionRegistry { system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter()); system.registerGenericUDAF("approx_distinct", new GenericUDAFApproximateDistinct()); system.registerUDAF("percentile", UDAFPercentile.class); + system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont()); + system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc()); // Generic UDFs diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java new file mode 100644 index 0000000..72a19bd --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java @@ -0,0 +1,502 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.LongWritable; + +/** + * GenericUDAFPercentileCont. + */ +@Description(name = "percentile_cont", value = "_FUNC_(input, pc) " + + "- Returns the percentile of expr at pc (range: [0,1]).") +public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver { + + private static final Comparator<LongWritable> LONG_COMPARATOR; + private static final Comparator<DoubleWritable> DOUBLE_COMPARATOR; + + static { + LONG_COMPARATOR = ShimLoader.getHadoopShims().getLongComparator(); + DOUBLE_COMPARATOR = new Comparator<DoubleWritable>() { + @Override + public int compare(DoubleWritable o1, DoubleWritable o2) { + return o1.compareTo(o2); + } + }; + } + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length != 2) { + throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case VOID: + return new PercentileContLongEvaluator(); + case FLOAT: + case DOUBLE: + case DECIMAL: + return new PercentileContDoubleEvaluator(); + case STRING: + case TIMESTAMP: + case VARCHAR: + case CHAR: + case BOOLEAN: + case DATE: + default: + throw new UDFArgumentTypeException(0, + "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + } + } + + /** + * A comparator to sort the entries in order - Long. + */ + @SuppressWarnings("serial") + public static class LongComparator + implements Comparator<Map.Entry<LongWritable, LongWritable>>, Serializable { + @Override + public int compare(Map.Entry<LongWritable, LongWritable> o1, + Map.Entry<LongWritable, LongWritable> o2) { + return LONG_COMPARATOR.compare(o1.getKey(), o2.getKey()); + } + } + + /** + * A comparator to sort the entries in order - Double. + */ + @SuppressWarnings("serial") + public static class DoubleComparator + implements Comparator<Map.Entry<DoubleWritable, LongWritable>>, Serializable { + @Override + public int compare(Map.Entry<DoubleWritable, LongWritable> o1, + Map.Entry<DoubleWritable, LongWritable> o2) { + return DOUBLE_COMPARATOR.compare(o1.getKey(), o2.getKey()); + } + } + + protected interface PercentileCalculator<U> { + double getPercentile(List<Map.Entry<U, LongWritable>> entriesList, double position); + } + + /** + * An abstract class to hold the generic udf functions for calculating percentile. + */ + public abstract static class PercentileContEvaluator<T, U> extends GenericUDAFEvaluator { + PercentileCalculator<U> calc = getCalculator(); + + /** + * A state class to store intermediate aggregation results. + */ + public class PercentileAgg extends AbstractAggregationBuffer { + Map<U, LongWritable> counts; + List<DoubleWritable> percentiles; + } + + // For PARTIAL1 and COMPLETE + protected PrimitiveObjectInspector inputOI; + MapObjectInspector countsOI; + ListObjectInspector percentilesOI; + + // For PARTIAL1 and PARTIAL2 + protected transient Object[] partialResult; + + // FINAL and COMPLETE output + protected DoubleWritable result; + + // PARTIAL2 and FINAL inputs + protected transient StructObjectInspector soi; + protected transient StructField countsField; + protected transient StructField percentilesField; + + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + initInspectors(parameters); + + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// ...for partial result + partialResult = new Object[2]; + + ArrayList<ObjectInspector> foi = getPartialInspectors(); + + ArrayList<String> fname = new ArrayList<String>(); + fname.add("counts"); + fname.add("percentiles"); + + return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } else { // ...for final result + result = new DoubleWritable(0); + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } + } + + protected abstract PercentileCalculator<U> getCalculator(); + + protected abstract ArrayList<ObjectInspector> getPartialInspectors(); + + protected abstract T getInput(Object object, PrimitiveObjectInspector inputOI); + + protected abstract U wrapInput(T input); + + protected abstract U copyInput(U input); + + protected abstract void sortEntries(List<Entry<U, LongWritable>> entriesList); + + protected void initInspectors(ObjectInspector[] parameters) { + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data + inputOI = (PrimitiveObjectInspector) parameters[0]; + } else { // ...for partial result as input + soi = (StructObjectInspector) parameters[0]; + + countsField = soi.getStructFieldRef("counts"); + percentilesField = soi.getStructFieldRef("percentiles"); + + countsOI = (MapObjectInspector) countsField.getFieldObjectInspector(); + percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector(); + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + PercentileAgg agg = new PercentileAgg(); + return agg; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + if (percAgg.counts != null) { + percAgg.counts.clear(); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + Double percentile = ((HiveDecimalWritable) parameters[1]).getHiveDecimal().doubleValue(); + + if (percAgg.percentiles == null) { + validatePercentile(percentile); + percAgg.percentiles = new ArrayList<DoubleWritable>(1); + percAgg.percentiles.add(new DoubleWritable(percentile)); + } + + if (parameters[0] == null) { + return; + } + + T input = getInput(parameters[0], inputOI); + + if (input != null) { + increment(percAgg, wrapInput(input), 1); + } + } + + protected void increment(PercentileAgg s, U input, long i) { + if (s.counts == null) { + s.counts = new HashMap<U, LongWritable>(); + } + LongWritable count = s.counts.get(input); + if (count == null) { + s.counts.put(copyInput(input), new LongWritable(i)); + } else { + count.set(count.get() + i); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial == null) { + return; + } + + Object objCounts = soi.getStructFieldData(partial, countsField); + Object objPercentiles = soi.getStructFieldData(partial, percentilesField); + + Map<U, LongWritable> counts = (Map<U, LongWritable>) countsOI.getMap(objCounts); + List<DoubleWritable> percentiles = + (List<DoubleWritable>) percentilesOI.getList(objPercentiles); + + if (counts == null || percentiles == null) { + return; + } + + PercentileAgg percAgg = (PercentileAgg) agg; + + if (percAgg.percentiles == null) { + percAgg.percentiles = new ArrayList<DoubleWritable>(percentiles); + } + + for (Map.Entry<U, LongWritable> e : counts.entrySet()) { + increment(percAgg, e.getKey(), e.getValue().get()); + } + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + + // No input data. + if (percAgg.counts == null || percAgg.counts.size() == 0) { + return null; + } + + // Get all items into an array and sort them. + Set<Map.Entry<U, LongWritable>> entries = percAgg.counts.entrySet(); + List<Map.Entry<U, LongWritable>> entriesList = + new ArrayList<Map.Entry<U, LongWritable>>(entries); + sortEntries(entriesList); + + // Accumulate the counts. + long total = getTotal(entriesList); + + // Initialize the result. + if (result == null) { + result = new DoubleWritable(); + } + + calculatePercentile(percAgg, entriesList, total); + + return result; + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + partialResult[0] = percAgg.counts; + partialResult[1] = percAgg.percentiles; + + return partialResult; + } + + protected long getTotal(List<Map.Entry<U, LongWritable>> entriesList) { + long total = 0; + for (int i = 0; i < entriesList.size(); i++) { + LongWritable count = entriesList.get(i).getValue(); + total += count.get(); + count.set(total); + } + return total; + } + + protected void validatePercentile(Double percentile) { + if (percentile < 0.0 || percentile > 1.0) { + throw new RuntimeException("Percentile value must be within the range of 0 to 1."); + } + } + + protected void calculatePercentile(PercentileAgg percAgg, + List<Map.Entry<U, LongWritable>> entriesList, long total) { + // maxPosition is the 1.0 percentile + long maxPosition = total - 1; + double position = maxPosition * percAgg.percentiles.get(0).get(); + result.set(calc.getPercentile(entriesList, position)); + } + + } + + /** + * The evaluator for percentile computation based on long. + */ + public static class PercentileContLongEvaluator + extends PercentileContEvaluator<Long, LongWritable> { + + protected ArrayList<ObjectInspector> getPartialInspectors() { + ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>(); + + foi.add(ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector, + PrimitiveObjectInspectorFactory.writableLongObjectInspector)); + foi.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + return foi; + } + + protected Long getInput(Object parameter, PrimitiveObjectInspector inputOI) { + return PrimitiveObjectInspectorUtils.getLong(parameter, inputOI); + } + + protected LongWritable wrapInput(Long input) { + return new LongWritable(input); + } + + protected LongWritable copyInput(LongWritable input) { + return new LongWritable(input.get()); + } + + protected void sortEntries(List<Entry<LongWritable, LongWritable>> entriesList) { + Collections.sort(entriesList, new LongComparator()); + } + + @Override + protected PercentileCalculator<LongWritable> getCalculator() { + return new PercentileContLongCalculator(); + } + } + + /** + * The evaluator for percentile computation based on double. + */ + public static class PercentileContDoubleEvaluator + extends PercentileContEvaluator<Double, DoubleWritable> { + @Override + protected ArrayList<ObjectInspector> getPartialInspectors() { + ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>(); + + foi.add(ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector, + PrimitiveObjectInspectorFactory.writableLongObjectInspector)); + foi.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + return foi; + } + + @Override + protected Double getInput(Object parameter, PrimitiveObjectInspector inputOI) { + return PrimitiveObjectInspectorUtils.getDouble(parameter, inputOI); + } + + @Override + protected DoubleWritable wrapInput(Double input) { + return new DoubleWritable(input); + } + + protected DoubleWritable copyInput(DoubleWritable input) { + return new DoubleWritable(input.get()); + } + + protected void sortEntries(List<Entry<DoubleWritable, LongWritable>> entriesList) { + Collections.sort(entriesList, new DoubleComparator()); + } + + @Override + protected PercentileCalculator<DoubleWritable> getCalculator() { + return new PercentileContDoubleCalculator(); + } + } + + /** + * continuous percentile calculators + */ + public static class PercentileContLongCalculator implements PercentileCalculator<LongWritable> { + /** + * Get the percentile value. + */ + public double getPercentile(List<Entry<LongWritable, LongWritable>> entriesList, + double position) { + // We may need to do linear interpolation to get the exact percentile + long lower = (long) Math.floor(position); + long higher = (long) Math.ceil(position); + + // Linear search since this won't take much time from the total execution anyway + // lower has the range of [0 .. total-1] + // The first entry with accumulated count (lower+1) corresponds to the lower position. + int i = 0; + while (entriesList.get(i).getValue().get() < lower + 1) { + i++; + } + + long lowerKey = entriesList.get(i).getKey().get(); + if (higher == lower) { + // no interpolation needed because position does not have a fraction + return lowerKey; + } + + if (entriesList.get(i).getValue().get() < higher + 1) { + i++; + } + long higherKey = entriesList.get(i).getKey().get(); + + if (higherKey == lowerKey) { + // no interpolation needed because lower position and higher position has the same key + return lowerKey; + } + + // Linear interpolation to get the exact percentile + return (higher - position) * lowerKey + (position - lower) * higherKey; + } + } + + public static class PercentileContDoubleCalculator + implements PercentileCalculator<DoubleWritable> { + + public double getPercentile(List<Map.Entry<DoubleWritable, LongWritable>> entriesList, + double position) { + long lower = (long) Math.floor(position); + long higher = (long) Math.ceil(position); + + int i = 0; + while (entriesList.get(i).getValue().get() < lower + 1) { + i++; + } + + double lowerKey = entriesList.get(i).getKey().get(); + if (higher == lower) { + return lowerKey; + } + + if (entriesList.get(i).getValue().get() < higher + 1) { + i++; + } + double higherKey = entriesList.get(i).getKey().get(); + + if (higherKey == lowerKey) { + return lowerKey; + } + + return (higher - position) * lowerKey + (position - lower) * higherKey; + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java new file mode 100644 index 0000000..3ac336e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.LongWritable; + +/** + * GenericUDAFPercentileDisc. + */ +@Description(name = "percentile_disc", value = "_FUNC_(input, pc) - " + + "Returns the percentile of expr at pc (range: [0,1]) without interpolation.") +public class GenericUDAFPercentileDisc extends GenericUDAFPercentileCont { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length != 2) { + throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case VOID: + return new PercentileDiscLongEvaluator(); + case FLOAT: + case DOUBLE: + case DECIMAL: + return new PercentileDiscDoubleEvaluator(); + case STRING: + case TIMESTAMP: + case VARCHAR: + case CHAR: + case BOOLEAN: + case DATE: + default: + throw new UDFArgumentTypeException(0, + "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + } + } + + /** + * The evaluator for discrete percentile computation based on long. + */ + public static class PercentileDiscLongEvaluator extends PercentileContLongEvaluator { + @Override + protected PercentileCalculator<LongWritable> getCalculator() { + return new PercentileDiscLongCalculator(); + } + } + + /** + * The evaluator for discrete percentile computation based on double. + */ + public static class PercentileDiscDoubleEvaluator extends PercentileContDoubleEvaluator { + @Override + protected PercentileCalculator<DoubleWritable> getCalculator() { + return new PercentileDiscDoubleCalculator(); + } + } + + public static class PercentileDiscLongCalculator implements PercentileCalculator<LongWritable> { + public double getPercentile(List<Map.Entry<LongWritable, LongWritable>> entriesList, + double position) { + long lower = (long) Math.floor(position); + long higher = (long) Math.ceil(position); + + int i = 0; + while (entriesList.get(i).getValue().get() < lower + 1) { + i++; + } + + long lowerKey = entriesList.get(i).getKey().get(); + if (higher == lower) { + return lowerKey; + } + + if (entriesList.get(i).getValue().get() < higher + 1) { + i++; + } + return entriesList.get(i).getKey().get(); + } + } + + public static class PercentileDiscDoubleCalculator + implements PercentileCalculator<DoubleWritable> { + public double getPercentile(List<Map.Entry<DoubleWritable, LongWritable>> entriesList, + double position) { + long lower = (long) Math.floor(position); + long higher = (long) Math.ceil(position); + + int i = 0; + while (entriesList.get(i).getValue().get() < lower + 1) { + i++; + } + + double lowerKey = entriesList.get(i).getKey().get(); + if (higher == lower) { + return lowerKey; + } + + if (entriesList.get(i).getValue().get() < higher + 1) { + i++; + } + return entriesList.get(i).getKey().get(); + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java new file mode 100644 index 0000000..1a7aaca --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContDoubleEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContEvaluator.PercentileAgg; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongCalculator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class for GenericUDAFPercentileCont. + */ +public class TestGenericUDAFPercentileCont { + PercentileContLongCalculator calc = new PercentileContLongCalculator(); + + // Long type tests + @Test + public void testNoInterpolation() throws Exception { + Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.5, 3); + } + + @Test + public void testInterpolateLower() throws Exception { + Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.49, 2.96); + } + + @Test + public void testInterpolateHigher() throws Exception { + Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.51, 3.04); + } + + @Test + public void testSingleItem50() throws Exception { + Long[] items = new Long[] {1L }; + checkPercentile(items, 0.5, 1); + } + + @Test + public void testSingleItem100() throws Exception { + Long[] items = new Long[] {1L }; + checkPercentile(items, 1, 1); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_cont(.50) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostgresRefExample() throws Exception { + Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.5, 54); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_cont(.72) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostgresRefExample2() throws Exception { + Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.72, 76.64); + } + + // Double type tests + @Test + public void testDoubleNoInterpolation() throws Exception { + Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 }; + checkPercentile(items, 0.5, 3); + } + + @Test + public void testDoubleInterpolateLower() throws Exception { + Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 }; + checkPercentile(items, 0.49, 2.96); + } + + @Test + public void testDoubleInterpolateHigher() throws Exception { + Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 }; + checkPercentile(items, 0.51, 3.04); + } + + @Test + public void testDoubleSingleItem50() throws Exception { + Double[] items = new Double[] {1.0 }; + checkPercentile(items, 0.5, 1); + } + + @Test + public void testDoubleSingleItem100() throws Exception { + Double[] items = new Double[] {1.0 }; + checkPercentile(items, 1, 1); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54.0), (35.0), (15.0), (15.0), (76.0), (87.0), + * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_cont(.50) within group + * (order by k) as perc from percentile_src; + */ + @Test + public void testDoublePostgresRefExample() throws Exception { + Double[] items = new Double[] {54.0, 35.0, 15.0, 15.0, 76.0, 87.0, 78.0 }; + checkPercentile(items, 0.5, 54); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54.5), (35.3), (15.7), (15.7), (76.8), (87.34), + * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_cont(.72) within group + * (order by k) as perc from percentile_src; + */ + @Test + public void testDoublePostgresRefExample2() throws Exception { + Double[] items = new Double[] {54.5, 35.3, 15.7, 15.7, 76.8, 87.34, 78.0 }; + checkPercentile(items, 0.72, 77.184); + } + + @SuppressWarnings({ "unchecked", "resource" }) + private void checkPercentile(Long[] items, double percentile, double expected) throws Exception { + PercentileContLongEvaluator eval = new GenericUDAFPercentileCont.PercentileContLongEvaluator(); + + PercentileAgg agg = new PercentileContLongEvaluator().new PercentileAgg(); + + agg.percentiles = new ArrayList<DoubleWritable>(); + agg.percentiles.add(new DoubleWritable(percentile)); + + for (int i = 0; i < items.length; i++) { + eval.increment(agg, new LongWritable(items[i]), 1); + } + + DoubleWritable result = (DoubleWritable) eval.terminate(agg); + + Assert.assertEquals(expected, result.get(), 0.01); + eval.close(); + } + + @SuppressWarnings({ "unchecked", "resource" }) + private void checkPercentile(Double[] items, double percentile, double expected) throws Exception { + PercentileContDoubleEvaluator eval = new GenericUDAFPercentileCont.PercentileContDoubleEvaluator(); + + PercentileAgg agg = new PercentileContLongEvaluator().new PercentileAgg(); + + agg.percentiles = new ArrayList<DoubleWritable>(); + agg.percentiles.add(new DoubleWritable(percentile)); + + for (int i = 0; i < items.length; i++) { + eval.increment(agg, new DoubleWritable(items[i]), 1); + } + + DoubleWritable result = (DoubleWritable) eval.terminate(agg); + + Assert.assertEquals(expected, result.get(), 0.01); + eval.close(); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java new file mode 100644 index 0000000..1a1b3b0 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContEvaluator.PercentileAgg; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc.PercentileDiscDoubleEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc.PercentileDiscLongCalculator; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class for GenericUDAFPercentileDisc. + */ +public class TestGenericUDAFPercentileDisc { + PercentileDiscLongCalculator calc = new PercentileDiscLongCalculator(); + + // Long type tests + @Test + public void testNoInterpolation() throws Exception { + Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.5, 3); + } + + @Test + public void testInterpolateLower() throws Exception { + Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.49, 3.0); + } + + @Test + public void testInterpolateHigher() throws Exception { + Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.51, 4.0); + } + + @Test + public void testSingleItem50() throws Exception { + Long[] items = new Long[] {1L }; + checkPercentile(items, 0.5, 1); + } + + @Test + public void testSingleItem100() throws Exception { + Long[] items = new Long[] {1L }; + checkPercentile(items, 1, 1); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_disc(.50) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostgresRefExample() throws Exception { + Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.5, 54); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_disc(.72) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostgresRefExample2() throws Exception { + Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.72, 78); + } + + // Long type tests + @Test + public void testDoubleNoInterpolation() throws Exception { + Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 }; + checkPercentile(items, 0.5, 3); + } + + @Test + public void testDoubleInterpolateLower() throws Exception { + Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 }; + checkPercentile(items, 0.49, 3.0); + } + + @Test + public void testDoubleInterpolateHigher() throws Exception { + Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 }; + checkPercentile(items, 0.51, 4.0); + } + + @Test + public void testDoubleSingleItem50() throws Exception { + Double[] items = new Double[] {1.0 }; + checkPercentile(items, 0.5, 1); + } + + @Test + public void testDoubleSingleItem100() throws Exception { + Double[] items = new Double[] {1.0 }; + checkPercentile(items, 1, 1); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54.0), (35.0), (15.0), (15.0), (76.0), (87.0), + * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_disc(.50) within group + * (order by k) as perc from percentile_src; + */ + @Test + public void testDoublePostgresRefExample() throws Exception { + Double[] items = new Double[] {54.0, 35.0, 15.0, 15.0, 76.0, 87.0, 78.0 }; + checkPercentile(items, 0.5, 54); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54.5), (35.3), (15.7), (15.7), (76.8), (87.34), + * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_disc(.72) within group + * (order by k) as perc from percentile_src; + */ + @Test + public void testDoublePostgresRefExample2() throws Exception { + Double[] items = new Double[] {54.5, 35.3, 15.7, 15.7, 76.8, 87.34, 78.0 }; + checkPercentile(items, 0.72, 78.0); + } + + private void checkPercentile(Long[] items, double percentile, double expected) throws Exception { + PercentileContLongEvaluator eval = new GenericUDAFPercentileDisc.PercentileDiscLongEvaluator(); + + PercentileAgg agg = new PercentileContLongEvaluator().new PercentileAgg(); + + agg.percentiles = new ArrayList<DoubleWritable>(); + agg.percentiles.add(new DoubleWritable(percentile)); + + for (int i = 0; i < items.length; i++) { + eval.increment(agg, new LongWritable(items[i]), 1); + } + + DoubleWritable result = (DoubleWritable) eval.terminate(agg); + + Assert.assertEquals(expected, result.get(), 0.01); + eval.close(); + } + + @SuppressWarnings({ "unchecked", "resource" }) + private void checkPercentile(Double[] items, double percentile, double expected) + throws Exception { + PercentileDiscDoubleEvaluator eval = + new GenericUDAFPercentileDisc.PercentileDiscDoubleEvaluator(); + + PercentileAgg agg = new PercentileDiscDoubleEvaluator().new PercentileAgg(); + + agg.percentiles = new ArrayList<DoubleWritable>(); + agg.percentiles.add(new DoubleWritable(percentile)); + + for (int i = 0; i < items.length; i++) { + eval.increment(agg, new DoubleWritable(items[i]), 1); + } + + DoubleWritable result = (DoubleWritable) eval.terminate(agg); + + Assert.assertEquals(expected, result.get(), 0.01); + eval.close(); + } +} diff --git a/ql/src/test/queries/clientpositive/udaf_percentile_cont.q b/ql/src/test/queries/clientpositive/udaf_percentile_cont.q new file mode 100644 index 0000000..6d788c1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udaf_percentile_cont.q @@ -0,0 +1,69 @@ +--! qt:dataset:src +DESCRIBE FUNCTION percentile_cont; +DESCRIBE FUNCTION EXTENDED percentile_cont; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +-- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +-- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +-- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + +select percentile_cont(cast(key as bigint), 0.5) from src where false; diff --git a/ql/src/test/queries/clientpositive/udaf_percentile_disc.q b/ql/src/test/queries/clientpositive/udaf_percentile_disc.q new file mode 100644 index 0000000..7ba703e --- /dev/null +++ b/ql/src/test/queries/clientpositive/udaf_percentile_disc.q @@ -0,0 +1,69 @@ +--! qt:dataset:src +DESCRIBE FUNCTION percentile_disc; +DESCRIBE FUNCTION EXTENDED percentile_disc; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +-- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +-- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +-- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + +select percentile_disc(cast(key as bigint), 0.5) from src where false; diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index 0b772f0..4e44753 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -194,6 +194,8 @@ parse_url_tuple percent_rank percentile percentile_approx +percentile_cont +percentile_disc pi pmod posexplode diff --git a/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out b/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out new file mode 100644 index 0000000..dda6ce5 --- /dev/null +++ b/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out @@ -0,0 +1,421 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL diff --git a/ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out b/ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out new file mode 100644 index 0000000..7400d0b --- /dev/null +++ b/ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out @@ -0,0 +1,842 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +PREHOOK: query: DESCRIBE FUNCTION percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_disc +POSTHOOK: type: DESCFUNCTION +There is no documentation for function 'percentile_disc' +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +POSTHOOK: type: DESCFUNCTION +There is no documentation for function 'percentile_disc' +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL diff --git a/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out b/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out new file mode 100644 index 0000000..75fa36c --- /dev/null +++ b/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out @@ -0,0 +1,421 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_disc +POSTHOOK: type: DESCFUNCTION +percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +POSTHOOK: type: DESCFUNCTION +percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation. +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL