Repository: hive Updated Branches: refs/heads/master 723f2d369 -> aa61697b5
HIVE-12301: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix test failure for udf_percentile.q (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aa61697b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aa61697b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aa61697b Branch: refs/heads/master Commit: aa61697b589de11e0c2722bd6b3cf7b19fd7a5f7 Parents: 723f2d3 Author: Pengcheng Xiong <pxi...@apache.org> Authored: Wed Dec 2 23:24:47 2015 +0800 Committer: Pengcheng Xiong <pxi...@apache.org> Committed: Wed Dec 2 23:24:47 2015 +0800 ---------------------------------------------------------------------- .../calcite/translator/HiveGBOpConvUtil.java | 104 ++++- .../cbo_rp_groupby3_noskew_multi_distinct.q | 39 ++ .../clientpositive/cbo_rp_udf_percentile.q | 79 ++++ .../clientpositive/cbo_rp_udf_percentile2.q | 42 ++ .../cbo_rp_groupby3_noskew_multi_distinct.q.out | 142 ++++++ .../clientpositive/cbo_rp_udf_percentile.q.out | 450 +++++++++++++++++++ .../clientpositive/cbo_rp_udf_percentile2.q.out | 238 ++++++++++ 7 files changed, 1071 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java index a129cf3..a6d809b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java @@ -25,6 +25,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.AggregateCall; @@ -88,6 +89,8 @@ public class HiveGBOpConvUtil { private GenericUDAFEvaluator udafEvaluator; private final ArrayList<ExprNodeDesc> udafParams = new ArrayList<ExprNodeDesc>(); private List<Integer> udafParamsIndxInGBInfoDistExprs = new ArrayList<Integer>(); + // We store the position of the argument for the function in the input. + private List<Integer> argList; }; private static class GBInfo { @@ -231,6 +234,7 @@ public class HiveGBOpConvUtil { inputOpAf.tabAlias); udafAttrs.udafParams.addAll(argExps); udafAttrs.udafName = aggCall.getAggregation().getName(); + udafAttrs.argList = aggCall.getArgList(); udafAttrs.isDistinctUDAF = aggCall.isDistinct(); List<Integer> argLst = new ArrayList<Integer>(aggCall.getArgList()); List<Integer> distColIndicesOfUDAF = new ArrayList<Integer>(); @@ -247,7 +251,7 @@ public class HiveGBOpConvUtil { // TODO: this seems wrong (following what Hive Regular does) if (!distParamInRefsToOutputPos.containsKey(argLst.get(i)) && !deDupedNonDistIrefsSet.contains(argLst.get(i))) { - deDupedNonDistIrefsSet.add(i); + deDupedNonDistIrefsSet.add(argLst.get(i)); gbInfo.deDupedNonDistIrefs.add(udafAttrs.udafParams.get(i)); } } @@ -992,10 +996,17 @@ public class HiveGBOpConvUtil { .get(rs.getConf().getOutputKeyColumnNames().size() - 1); } int numDistinctUDFs = 0; - int distinctStartPosInReduceKeys = gbKeys.size(); List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols(); ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>(); int udafColStartPosInOriginalGB = gbInfo.gbKeys.size(); + // the positions in rsColInfoLst are as follows + // --grpkey--,--distkey--,--values-- + // but distUDAF may be before/after some non-distUDAF, + // i.e., their positions can be mixed. + // so we first process distUDAF and then non-distUDAF. + // But we need to remember the sequence of udafs. + List<Integer> distinctPositions = new ArrayList<>(); + Map<Integer, ArrayList<ExprNodeDesc>> indexToParameter = new TreeMap<>(); for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i); ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); @@ -1003,40 +1014,77 @@ public class HiveGBOpConvUtil { ColumnInfo rsUDAFParamColInfo; ExprNodeDesc udafParam; ExprNodeDesc constantPropDistinctUDAFParam; - for (int j = 0; j < udafAttr.udafParams.size(); j++) { - rsUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j); - String rsUDAFParamName = rsUDAFParamColInfo.getInternalName(); - // TODO: verify if this is needed - if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) { - rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" - + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j); - } - udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, - rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol()); - constantPropDistinctUDAFParam = SemanticAnalyzer - .isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), - reduceValues); - if (constantPropDistinctUDAFParam != null) { - udafParam = constantPropDistinctUDAFParam; + if (udafAttr.isDistinctUDAF) { + // udafAttr.udafParamsIndxInGBInfoDistExprs is not quite useful + // because distinctUDAF can also include group by key as an argument. + for (int j = 0; j < udafAttr.argList.size(); j++) { + int argPos = udafAttr.argList.get(j); + if (argPos < gbInfo.gbKeys.size() + distinctPositions.size()) { + // distinctUDAF includes group by key as an argument or reuses distinct keys. + rsUDAFParamColInfo = rsColInfoLst.get(argPos); + } else { + rsUDAFParamColInfo = rsColInfoLst.get(gbInfo.gbKeys.size() + distinctPositions.size()); + distinctPositions.add(argPos); + } + String rsDistUDAFParamName = rsUDAFParamColInfo.getInternalName(); + // TODO: verify if this is needed + if (lastReduceKeyColName != null) { + rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j); + } + + udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsDistUDAFParamName, + rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol()); + constantPropDistinctUDAFParam = SemanticAnalyzer + .isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), + reduceValues); + if (constantPropDistinctUDAFParam != null) { + udafParam = constantPropDistinctUDAFParam; + } + aggParameters.add(udafParam); } - aggParameters.add(udafParam); + indexToParameter.put(i, aggParameters); + numDistinctUDFs++; } + } + for (int i = 0; i < gbInfo.udafAttrs.size(); i++) { + UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i); + ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); - if (udafAttr.isDistinctUDAF) { - numDistinctUDFs++; + ColumnInfo rsUDAFParamColInfo; + ExprNodeDesc udafParam; + ExprNodeDesc constantPropDistinctUDAFParam; + if (!udafAttr.isDistinctUDAF) { + for (int j = 0; j < udafAttr.udafParams.size(); j++) { + int argPos = udafAttr.argList.get(j); + rsUDAFParamColInfo = rsColInfoLst.get(argPos + getOffSet(distinctPositions, argPos)); + String rsUDAFParamName = rsUDAFParamColInfo.getInternalName(); + udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, + rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol()); + constantPropDistinctUDAFParam = SemanticAnalyzer + .isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), + reduceValues); + if (constantPropDistinctUDAFParam != null) { + udafParam = constantPropDistinctUDAFParam; + } + aggParameters.add(udafParam); + } + indexToParameter.put(i, aggParameters); } + } + for(int index : indexToParameter.keySet()){ + UDAFAttrs udafAttr = gbInfo.udafAttrs.get(index); Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF); GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, - aggParameters); + indexToParameter.get(index)); aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode)); if (useOriginalGBNames) { - colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i); + colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + index); } else { colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1); } - colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); outputColNames.add(colOutputName); } @@ -1049,6 +1097,16 @@ public class HiveGBOpConvUtil { return new OpAttr("", new HashSet<Integer>(), rsGB1); } + private static int getOffSet(List<Integer> distinctPositions, int pos) { + int ret = 0; + for (int distPos : distinctPositions) { + if (distPos > pos) { + ret++; + } + } + return ret; + } + @SuppressWarnings("unchecked") private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException { ArrayList<String> outputColNames = new ArrayList<String>(); http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/test/queries/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q b/ql/src/test/queries/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q new file mode 100644 index 0000000..ce79928 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q @@ -0,0 +1,39 @@ +set hive.cbo.returnpath.hiveop=true; +set hive.map.aggr=false; + +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)); + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)); + +SELECT dest1.* FROM dest1; + http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile.q b/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile.q new file mode 100644 index 0000000..125bf0a --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile.q @@ -0,0 +1,79 @@ +set hive.cbo.returnpath.hiveop=true; + +DESCRIBE FUNCTION percentile; +DESCRIBE FUNCTION EXTENDED percentile; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +-- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +-- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile(NULL, 0.0), + percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +-- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + +select percentile(cast(key as bigint), 0.5) from src where false; + +-- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false; http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile2.q b/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile2.q new file mode 100644 index 0000000..6dd76a2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_rp_udf_percentile2.q @@ -0,0 +1,42 @@ +set hive.cbo.returnpath.hiveop=true; + +DESCRIBE FUNCTION percentile; +DESCRIBE FUNCTION EXTENDED percentile; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +-- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + +SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + count(distinct(substr(value, 5))), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + count(distinct(substr(value, 5))), + percentile(CAST(substr(value, 5) AS INT), 0.5), + count(distinct(substr(value, 2))), + percentile(CAST(substr(value, 5) AS INT), 1.0), + count(distinct(CAST(key AS INT) DIV 10)), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out new file mode 100644 index 0000000..95233b0 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(value, 5) (type: string) + outputColumnNames: $f0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: $f0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) + mode: complete + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double), $f9 (type: double), UDFToDouble($f10) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)), + sum(DISTINCT substr(src.value, 5)), + count(DISTINCT substr(src.value, 5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 79136.0 309.0 http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/test/results/clientpositive/cbo_rp_udf_percentile.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_rp_udf_percentile.q.out b/ql/src/test/results/clientpositive/cbo_rp_udf_percentile.q.out new file mode 100644 index 0000000..c699a95 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_udf_percentile.q.out @@ -0,0 +1,450 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: -- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +PREHOOK: query: -- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile(NULL, 0.0), + percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile(NULL, 0.0), + percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL NULL +1 NULL NULL +10 NULL NULL +11 NULL NULL +12 NULL NULL +13 NULL NULL +14 NULL NULL +15 NULL NULL +16 NULL NULL +17 NULL NULL +18 NULL NULL +19 NULL NULL +2 NULL NULL +20 NULL NULL +21 NULL NULL +22 NULL NULL +23 NULL NULL +24 NULL NULL +25 NULL NULL +26 NULL NULL +27 NULL NULL +28 NULL NULL +29 NULL NULL +3 NULL NULL +30 NULL NULL +31 NULL NULL +32 NULL NULL +33 NULL NULL +34 NULL NULL +35 NULL NULL +36 NULL NULL +37 NULL NULL +38 NULL NULL +39 NULL NULL +4 NULL NULL +40 NULL NULL +41 NULL NULL +42 NULL NULL +43 NULL NULL +44 NULL NULL +45 NULL NULL +46 NULL NULL +47 NULL NULL +48 NULL NULL +49 NULL NULL +5 NULL NULL +6 NULL NULL +7 NULL NULL +8 NULL NULL +9 NULL NULL +PREHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 [1.0,1.0,1.0,1.0] +1 1.0 [1.0,1.0,1.0,1.0] +10 NULL NULL +11 NULL NULL +12 NULL NULL +13 NULL NULL +14 NULL NULL +15 NULL NULL +16 NULL NULL +17 NULL NULL +18 NULL NULL +19 NULL NULL +2 1.0 [1.0,1.0,1.0,1.0] +20 NULL NULL +21 NULL NULL +22 NULL NULL +23 NULL NULL +24 NULL NULL +25 NULL NULL +26 NULL NULL +27 NULL NULL +28 NULL NULL +29 NULL NULL +3 1.0 [1.0,1.0,1.0,1.0] +30 NULL NULL +31 NULL NULL +32 NULL NULL +33 NULL NULL +34 NULL NULL +35 NULL NULL +36 NULL NULL +37 NULL NULL +38 NULL NULL +39 NULL NULL +4 1.0 [1.0,1.0,1.0,1.0] +40 NULL NULL +41 NULL NULL +42 NULL NULL +43 NULL NULL +44 NULL NULL +45 NULL NULL +46 NULL NULL +47 NULL NULL +48 NULL NULL +49 NULL NULL +5 NULL NULL +6 NULL NULL +7 NULL NULL +8 NULL NULL +9 NULL NULL +PREHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +PREHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/test/results/clientpositive/cbo_rp_udf_percentile2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_rp_udf_percentile2.q.out b/ql/src/test/results/clientpositive/cbo_rp_udf_percentile2.q.out new file mode 100644 index 0000000..de6db7d --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_udf_percentile2.q.out @@ -0,0 +1,238 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: -- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 6 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 7 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +10 4 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 6 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 5 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 6 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 4 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 7 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 9 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 8 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 6 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 9 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +2 5 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +20 8 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 6 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 7 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 6 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 6 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 5 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 5 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 6 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 10 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 4 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +3 5 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +30 6 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 6 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 5 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 7 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 5 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 3 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 8 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 6 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 4 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 7 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +4 5 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +40 8 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 6 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 4 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 8 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 5 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 7 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 7 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 6 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 8 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 9 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +5 5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 5 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 6 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 7 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 6 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + count(distinct(substr(value, 5))), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + count(distinct(substr(value, 5))), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 6 0.0 6 4.5 9.0 [0.0,4.5,8.91,9.0] +1 7 10.0 7 15.0 19.0 [10.0,15.0,18.91,19.0] +10 4 100.0 4 103.0 105.0 [100.0,103.0,104.94,105.0] +11 6 111.0 6 117.0 119.0 [111.0,117.0,119.0,119.0] +12 5 120.0 5 127.0 129.0 [120.0,127.0,129.0,129.0] +13 6 131.0 6 137.0 138.0 [131.0,137.0,138.0,138.0] +14 4 143.0 4 146.0 149.0 [143.0,146.0,149.0,149.0] +15 7 150.0 7 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 9 160.0 9 166.5 169.0 [160.0,166.5,169.0,169.0] +17 8 170.0 8 175.0 179.0 [170.0,175.0,179.0,179.0] +18 6 180.0 6 186.5 189.0 [180.0,186.5,188.86,189.0] +19 9 190.0 9 194.5 199.0 [190.0,194.5,199.0,199.0] +2 5 20.0 5 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +20 8 200.0 8 205.0 209.0 [200.0,205.0,209.0,209.0] +21 6 213.0 6 216.5 219.0 [213.0,216.5,219.0,219.0] +22 7 221.0 7 224.0 229.0 [221.0,224.0,229.0,229.0] +23 6 230.0 6 234.0 239.0 [230.0,234.0,239.0,239.0] +24 6 241.0 6 244.0 249.0 [241.0,244.0,248.94,249.0] +25 5 252.0 5 256.0 258.0 [252.0,256.0,257.94,258.0] +26 5 260.0 5 264.0 266.0 [260.0,264.0,265.95,266.0] +27 6 272.0 6 275.0 278.0 [272.0,275.0,278.0,278.0] +28 10 280.0 10 283.5 289.0 [280.0,283.5,288.87,289.0] +29 4 291.0 4 297.0 298.0 [291.0,297.0,298.0,298.0] +3 5 30.0 5 35.0 37.0 [30.0,35.0,37.0,37.0] +30 6 302.0 6 307.0 309.0 [302.0,307.0,309.0,309.0] +31 6 310.0 6 316.0 318.0 [310.0,316.0,318.0,318.0] +32 5 321.0 5 324.0 327.0 [321.0,324.0,327.0,327.0] +33 7 331.0 7 333.0 339.0 [331.0,333.0,338.92,339.0] +34 5 341.0 5 345.0 348.0 [341.0,345.0,348.0,348.0] +35 3 351.0 3 353.0 356.0 [351.0,353.0,355.91,356.0] +36 8 360.0 8 367.0 369.0 [360.0,367.0,369.0,369.0] +37 6 373.0 6 376.0 379.0 [373.0,376.0,378.95,379.0] +38 4 382.0 4 384.0 389.0 [382.0,384.0,388.82,389.0] +39 7 392.0 7 396.0 399.0 [392.0,396.0,399.0,399.0] +4 5 41.0 5 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +40 8 400.0 8 403.5 409.0 [400.0,403.5,409.0,409.0] +41 6 411.0 6 415.5 419.0 [411.0,415.5,418.91,419.0] +42 4 421.0 4 425.5 429.0 [421.0,425.5,429.0,429.0] +43 8 430.0 8 435.0 439.0 [430.0,435.0,439.0,439.0] +44 5 443.0 5 446.0 449.0 [443.0,446.0,448.96,449.0] +45 7 452.0 7 455.0 459.0 [452.0,455.0,459.0,459.0] +46 7 460.0 7 467.5 469.0 [460.0,467.5,469.0,469.0] +47 6 470.0 6 477.0 479.0 [470.0,477.0,478.94,479.0] +48 8 480.0 8 484.0 489.0 [480.0,484.0,489.0,489.0] +49 9 490.0 9 494.5 498.0 [490.0,494.5,498.0,498.0] +5 5 51.0 5 54.0 58.0 [51.0,54.0,58.0,58.0] +6 5 64.0 5 66.5 69.0 [64.0,66.5,68.9,69.0] +7 6 70.0 6 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 7 80.0 7 84.0 87.0 [80.0,84.0,86.92,87.0] +9 6 90.0 6 95.0 98.0 [90.0,95.0,98.0,98.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + count(distinct(substr(value, 5))), + percentile(CAST(substr(value, 5) AS INT), 0.5), + count(distinct(substr(value, 2))), + percentile(CAST(substr(value, 5) AS INT), 1.0), + count(distinct(CAST(key AS INT) DIV 10)), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + count(distinct(value)), + percentile(CAST(substr(value, 5) AS INT), 0.0), + count(distinct(substr(value, 5))), + percentile(CAST(substr(value, 5) AS INT), 0.5), + count(distinct(substr(value, 2))), + percentile(CAST(substr(value, 5) AS INT), 1.0), + count(distinct(CAST(key AS INT) DIV 10)), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 6 0.0 6 4.5 6 9.0 1 [0.0,4.5,8.91,9.0] +1 7 10.0 7 15.0 7 19.0 1 [10.0,15.0,18.91,19.0] +10 4 100.0 4 103.0 4 105.0 1 [100.0,103.0,104.94,105.0] +11 6 111.0 6 117.0 6 119.0 1 [111.0,117.0,119.0,119.0] +12 5 120.0 5 127.0 5 129.0 1 [120.0,127.0,129.0,129.0] +13 6 131.0 6 137.0 6 138.0 1 [131.0,137.0,138.0,138.0] +14 4 143.0 4 146.0 4 149.0 1 [143.0,146.0,149.0,149.0] +15 7 150.0 7 154.0 7 158.0 1 [150.0,154.0,157.92999999999998,158.0] +16 9 160.0 9 166.5 9 169.0 1 [160.0,166.5,169.0,169.0] +17 8 170.0 8 175.0 8 179.0 1 [170.0,175.0,179.0,179.0] +18 6 180.0 6 186.5 6 189.0 1 [180.0,186.5,188.86,189.0] +19 9 190.0 9 194.5 9 199.0 1 [190.0,194.5,199.0,199.0] +2 5 20.0 5 26.0 5 28.0 1 [20.0,26.0,27.939999999999998,28.0] +20 8 200.0 8 205.0 8 209.0 1 [200.0,205.0,209.0,209.0] +21 6 213.0 6 216.5 6 219.0 1 [213.0,216.5,219.0,219.0] +22 7 221.0 7 224.0 7 229.0 1 [221.0,224.0,229.0,229.0] +23 6 230.0 6 234.0 6 239.0 1 [230.0,234.0,239.0,239.0] +24 6 241.0 6 244.0 6 249.0 1 [241.0,244.0,248.94,249.0] +25 5 252.0 5 256.0 5 258.0 1 [252.0,256.0,257.94,258.0] +26 5 260.0 5 264.0 5 266.0 1 [260.0,264.0,265.95,266.0] +27 6 272.0 6 275.0 6 278.0 1 [272.0,275.0,278.0,278.0] +28 10 280.0 10 283.5 10 289.0 1 [280.0,283.5,288.87,289.0] +29 4 291.0 4 297.0 4 298.0 1 [291.0,297.0,298.0,298.0] +3 5 30.0 5 35.0 5 37.0 1 [30.0,35.0,37.0,37.0] +30 6 302.0 6 307.0 6 309.0 1 [302.0,307.0,309.0,309.0] +31 6 310.0 6 316.0 6 318.0 1 [310.0,316.0,318.0,318.0] +32 5 321.0 5 324.0 5 327.0 1 [321.0,324.0,327.0,327.0] +33 7 331.0 7 333.0 7 339.0 1 [331.0,333.0,338.92,339.0] +34 5 341.0 5 345.0 5 348.0 1 [341.0,345.0,348.0,348.0] +35 3 351.0 3 353.0 3 356.0 1 [351.0,353.0,355.91,356.0] +36 8 360.0 8 367.0 8 369.0 1 [360.0,367.0,369.0,369.0] +37 6 373.0 6 376.0 6 379.0 1 [373.0,376.0,378.95,379.0] +38 4 382.0 4 384.0 4 389.0 1 [382.0,384.0,388.82,389.0] +39 7 392.0 7 396.0 7 399.0 1 [392.0,396.0,399.0,399.0] +4 5 41.0 5 42.5 5 47.0 1 [41.0,42.5,46.849999999999994,47.0] +40 8 400.0 8 403.5 8 409.0 1 [400.0,403.5,409.0,409.0] +41 6 411.0 6 415.5 6 419.0 1 [411.0,415.5,418.91,419.0] +42 4 421.0 4 425.5 4 429.0 1 [421.0,425.5,429.0,429.0] +43 8 430.0 8 435.0 8 439.0 1 [430.0,435.0,439.0,439.0] +44 5 443.0 5 446.0 5 449.0 1 [443.0,446.0,448.96,449.0] +45 7 452.0 7 455.0 7 459.0 1 [452.0,455.0,459.0,459.0] +46 7 460.0 7 467.5 7 469.0 1 [460.0,467.5,469.0,469.0] +47 6 470.0 6 477.0 6 479.0 1 [470.0,477.0,478.94,479.0] +48 8 480.0 8 484.0 8 489.0 1 [480.0,484.0,489.0,489.0] +49 9 490.0 9 494.5 9 498.0 1 [490.0,494.5,498.0,498.0] +5 5 51.0 5 54.0 5 58.0 1 [51.0,54.0,58.0,58.0] +6 5 64.0 5 66.5 5 69.0 1 [64.0,66.5,68.9,69.0] +7 6 70.0 6 73.0 6 78.0 1 [70.0,73.0,77.91000000000001,78.0] +8 7 80.0 7 84.0 7 87.0 1 [80.0,84.0,86.92,87.0] +9 6 90.0 6 95.0 6 98.0 1 [90.0,95.0,98.0,98.0]