Repository: hive Updated Branches: refs/heads/master 6a8d7e4cd -> cdaf35674
HIVE-11937: Improve StatsOptimizer to deal with query with additional constant columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdaf3567 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdaf3567 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdaf3567 Branch: refs/heads/master Commit: cdaf356740195cde6f5b6bfdade2f614e1c618d3 Parents: 6a8d7e4 Author: Pengcheng Xiong <pxi...@apache.org> Authored: Tue Sep 29 17:47:39 2015 -0700 Committer: Pengcheng Xiong <pxi...@apache.org> Committed: Tue Sep 29 17:47:39 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/optimizer/StatsOptimizer.java | 46 ++++- .../clientpositive/metadata_only_queries.q | 15 ++ .../clientpositive/metadata_only_queries.q.out | 158 +++++++++++++++++ .../spark/metadata_only_queries.q.out | 170 +++++++++++++++++++ .../tez/metadata_only_queries.q.out | 170 +++++++++++++++++++ 5 files changed, 552 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index bc8d8f7..5a21e6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -235,10 +237,23 @@ public class StatsOptimizer implements Transform { return null; } Operator<?> last = (Operator<?>) stack.get(5); + SelectOperator cselOp = null; + Map<Integer,Object> posToConstant = new HashMap<>(); if (last instanceof SelectOperator) { - SelectOperator cselOp = (SelectOperator) last; + cselOp = (SelectOperator) last; if (!cselOp.isIdentitySelect()) { - return null; // todo we can do further by providing operator to fetch task + for (int pos = 0; pos < cselOp.getConf().getColList().size(); pos++) { + ExprNodeDesc desc = cselOp.getConf().getColList().get(pos); + if (desc instanceof ExprNodeConstantDesc) { + //We store the position to the constant value for later use. + posToConstant.put(pos, ((ExprNodeConstantDesc)desc).getValue()); + } else { + if (!(desc instanceof ExprNodeColumnDesc)) { + // Probably an expression, cant handle that + return null; + } + } + } } last = (Operator<?>) stack.get(6); } @@ -588,13 +603,30 @@ public class StatsOptimizer implements Transform { List<List<Object>> allRows = new ArrayList<List<Object>>(); - allRows.add(oneRow); - List<String> colNames = new ArrayList<String>(); List<ObjectInspector> ois = new ArrayList<ObjectInspector>(); - for (ColumnInfo colInfo: cgbyOp.getSchema().getSignature()) { - colNames.add(colInfo.getInternalName()); - ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType())); + if (cselOp == null) { + allRows.add(oneRow); + for (ColumnInfo colInfo : cgbyOp.getSchema().getSignature()) { + colNames.add(colInfo.getInternalName()); + ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType())); + } + } else { + int aggrPos = 0; + List<Object> oneRowWithConstant = new ArrayList<>(); + for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); pos++) { + if (posToConstant.containsKey(pos)) { + // This position is a constant. + oneRowWithConstant.add(posToConstant.get(pos)); + } else { + // This position is an aggregation. + oneRowWithConstant.add(oneRow.get(aggrPos++)); + } + ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos); + colNames.add(colInfo.getInternalName()); + ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType())); + } + allRows.add(oneRowWithConstant); } StandardStructObjectInspector sOI = ObjectInspectorFactory. getStandardStructObjectInspector(colNames, ois); http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/queries/clientpositive/metadata_only_queries.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q index 56f3a78..70fac92 100644 --- a/ql/src/test/queries/clientpositive/metadata_only_queries.q +++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -57,6 +57,11 @@ select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), co explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part; +explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl; +explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part; + analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin; analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin; analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin; @@ -69,6 +74,12 @@ explain select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl; select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl; +explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl; +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl; + + + explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part; select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part; @@ -76,6 +87,10 @@ explain select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part; select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part; +explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part; +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part; + explain select count(ts) from stats_tbl_part; drop table stats_tbl; http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/metadata_only_queries.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 2dcd437..65a4dfa 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -276,6 +276,114 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_tbl + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_tbl_part + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl @@ -364,6 +472,31 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl #### A masked pattern was here #### 65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0 +PREHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0 PREHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part PREHOOK: type: QUERY @@ -414,6 +547,31 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part #### A masked pattern was here #### 65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0 +PREHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0 PREHOOK: query: explain select count(ts) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain select count(ts) from stats_tbl_part http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index b2221fc..0d85f4e 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -288,6 +288,126 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_tbl + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_tbl_part + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl @@ -376,6 +496,31 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl #### A masked pattern was here #### 65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0 +PREHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0 PREHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part PREHOOK: type: QUERY @@ -426,6 +571,31 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part #### A masked pattern was here #### 65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0 +PREHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0 PREHOOK: query: explain select count(ts) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain select count(ts) from stats_tbl_part http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out index f43440e..ab86ab0 100644 --- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out @@ -288,6 +288,126 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_tbl + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_tbl_part + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl @@ -376,6 +496,31 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl #### A masked pattern was here #### 65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0 +PREHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl +#### A masked pattern was here #### +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0 PREHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part PREHOOK: type: QUERY @@ -426,6 +571,31 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part #### A masked pattern was here #### 65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0 +PREHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_tbl_part +#### A masked pattern was here #### +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0 PREHOOK: query: explain select count(ts) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain select count(ts) from stats_tbl_part