Repository: hive
Updated Branches:
  refs/heads/master 6a8d7e4cd -> cdaf35674


HIVE-11937: Improve StatsOptimizer to deal with query with additional constant 
columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdaf3567
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdaf3567
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdaf3567

Branch: refs/heads/master
Commit: cdaf356740195cde6f5b6bfdade2f614e1c618d3
Parents: 6a8d7e4
Author: Pengcheng Xiong <pxi...@apache.org>
Authored: Tue Sep 29 17:47:39 2015 -0700
Committer: Pengcheng Xiong <pxi...@apache.org>
Committed: Tue Sep 29 17:47:39 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/StatsOptimizer.java       |  46 ++++-
 .../clientpositive/metadata_only_queries.q      |  15 ++
 .../clientpositive/metadata_only_queries.q.out  | 158 +++++++++++++++++
 .../spark/metadata_only_queries.q.out           | 170 +++++++++++++++++++
 .../tez/metadata_only_queries.q.out             | 170 +++++++++++++++++++
 5 files changed, 552 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index bc8d8f7..5a21e6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -235,10 +237,23 @@ public class StatsOptimizer implements Transform {
           return null;
         }
         Operator<?> last = (Operator<?>) stack.get(5);
+        SelectOperator cselOp = null;
+        Map<Integer,Object> posToConstant = new HashMap<>();
         if (last instanceof SelectOperator) {
-          SelectOperator cselOp = (SelectOperator) last;
+          cselOp = (SelectOperator) last;
           if (!cselOp.isIdentitySelect()) {
-            return null;  // todo we can do further by providing operator to 
fetch task
+            for (int pos = 0; pos < cselOp.getConf().getColList().size(); 
pos++) {
+              ExprNodeDesc desc = cselOp.getConf().getColList().get(pos);
+              if (desc instanceof ExprNodeConstantDesc) {
+                //We store the position to the constant value for later use.
+                posToConstant.put(pos, 
((ExprNodeConstantDesc)desc).getValue());
+              } else {
+                if (!(desc instanceof ExprNodeColumnDesc)) {
+                  // Probably an expression, cant handle that
+                  return null;
+                }
+              }
+            }
           }
           last = (Operator<?>) stack.get(6);
         }
@@ -588,13 +603,30 @@ public class StatsOptimizer implements Transform {
 
 
         List<List<Object>> allRows = new ArrayList<List<Object>>();
-        allRows.add(oneRow);
-
         List<String> colNames = new ArrayList<String>();
         List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
-        for (ColumnInfo colInfo: cgbyOp.getSchema().getSignature()) {
-          colNames.add(colInfo.getInternalName());
-          
ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+        if (cselOp == null) {
+          allRows.add(oneRow);
+          for (ColumnInfo colInfo : cgbyOp.getSchema().getSignature()) {
+            colNames.add(colInfo.getInternalName());
+            
ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+          }
+        } else {
+          int aggrPos = 0;
+          List<Object> oneRowWithConstant = new ArrayList<>();
+          for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); 
pos++) {
+            if (posToConstant.containsKey(pos)) {
+              // This position is a constant.
+              oneRowWithConstant.add(posToConstant.get(pos));
+            } else {
+              // This position is an aggregation.
+              oneRowWithConstant.add(oneRow.get(aggrPos++));
+            }
+            ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos);
+            colNames.add(colInfo.getInternalName());
+            
ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+          }
+          allRows.add(oneRowWithConstant);
         }
         StandardStructObjectInspector sOI = ObjectInspectorFactory.
             getStandardStructObjectInspector(colNames, ois);

http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/queries/clientpositive/metadata_only_queries.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q 
b/ql/src/test/queries/clientpositive/metadata_only_queries.q
index 56f3a78..70fac92 100644
--- a/ql/src/test/queries/clientpositive/metadata_only_queries.q
+++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q
@@ -57,6 +57,11 @@ select count(*), sum(1), sum(0.2), count(1), count(s), 
count(bo), count(bin), co
 explain
 select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), 
count(si), max(i), min(b) from stats_tbl_part;
 
+explain 
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
+explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from 
stats_tbl_part;
+
 analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
 analyze table stats_tbl_part partition(dt='2010') compute statistics for 
columns t,si,i,b,f,d,bo,s,bin;
 analyze table stats_tbl_part partition(dt='2011') compute statistics for 
columns t,si,i,b,f,d,bo,s,bin;
@@ -69,6 +74,12 @@ explain
 select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from 
stats_tbl;
 select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from 
stats_tbl;
 
+explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl;
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl;
+
+
+
 explain 
 select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), 
count(si) from stats_tbl_part;
 select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), 
count(si) from stats_tbl_part;
@@ -76,6 +87,10 @@ explain
 select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from 
stats_tbl_part;
 select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from 
stats_tbl_part;
 
+explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part;
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part;
+
 explain select count(ts) from stats_tbl_part;
 
 drop table stats_tbl;

http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out 
b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
index 2dcd437..65a4dfa 100644
--- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
@@ -276,6 +276,114 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain 
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: stats_tbl
+            Statistics: Num rows: 9999 Data size: 1030908 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: s (type: string), bo (type: boolean), bin (type: 
binary), si (type: smallint), i (type: int), b (type: bigint)
+              outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+              Statistics: Num rows: 9999 Data size: 1030908 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(), sum(1), sum(0.2), count(1), 
count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), 
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: 
int), _col9 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9
+          Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint), '1' (type: string), _col1 
(type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), 
_col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: 
bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+            Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from 
stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from 
stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: stats_tbl_part
+            Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: s (type: string), bo (type: boolean), bin (type: 
binary), si (type: smallint), i (type: int), b (type: bigint)
+              outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+              Statistics: Num rows: 9489 Data size: 978785 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(), sum(1), sum(0.2), count(1), 
count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), 
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: 
int), _col9 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9
+          Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint), '1' (type: string), _col1 
(type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), 
_col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: 
bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+            Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: analyze table stats_tbl compute statistics for columns 
t,si,i,b,f,d,bo,s,bin
 PREHOOK: type: QUERY
 PREHOOK: Input: default@stats_tbl
@@ -364,6 +472,31 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stats_tbl
 #### A masked pattern was here ####
 65536  65791   4294967296      4294967551      0.01    99.98   0.01    50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536  1       65791   4294967296      4294967551      0.01    99.98   7.0     
0.01    50.0
 PREHOOK: query: explain 
 select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), 
count(si) from stats_tbl_part
 PREHOOK: type: QUERY
@@ -414,6 +547,31 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stats_tbl_part
 #### A masked pattern was here ####
 65536  65791   4294967296      4294967551      0.01    99.98   0.01    50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536  1       65791   4294967296      4294967551      0.01    99.98   7.0     
0.01    50.0
 PREHOOK: query: explain select count(ts) from stats_tbl_part
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(ts) from stats_tbl_part

http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out 
b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
index b2221fc..0d85f4e 100644
--- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
@@ -288,6 +288,126 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain 
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: stats_tbl
+                  Statistics: Num rows: 9999 Data size: 1030908 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: s (type: string), bo (type: boolean), bin 
(type: binary), si (type: smallint), i (type: int), b (type: bigint)
+                    outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+                    Statistics: Num rows: 9999 Data size: 1030908 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(), sum(1), sum(0.2), count(1), 
count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                      Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), 
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: 
int), _col9 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), '1' (type: string), _col1 
(type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), 
_col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: 
bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                  Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from 
stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from 
stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: stats_tbl_part
+                  Statistics: Num rows: 9489 Data size: 978785 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: s (type: string), bo (type: boolean), bin 
(type: binary), si (type: smallint), i (type: int), b (type: bigint)
+                    outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+                    Statistics: Num rows: 9489 Data size: 978785 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(), sum(1), sum(0.2), count(1), 
count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                      Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), 
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: 
int), _col9 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), '1' (type: string), _col1 
(type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), 
_col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: 
bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                  Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: analyze table stats_tbl compute statistics for columns 
t,si,i,b,f,d,bo,s,bin
 PREHOOK: type: QUERY
 PREHOOK: Input: default@stats_tbl
@@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stats_tbl
 #### A masked pattern was here ####
 65536  65791   4294967296      4294967551      0.01    99.98   0.01    50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536  1       65791   4294967296      4294967551      0.01    99.98   7.0     
0.01    50.0
 PREHOOK: query: explain 
 select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), 
count(si) from stats_tbl_part
 PREHOOK: type: QUERY
@@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stats_tbl_part
 #### A masked pattern was here ####
 65536  65791   4294967296      4294967551      0.01    99.98   0.01    50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536  1       65791   4294967296      4294967551      0.01    99.98   7.0     
0.01    50.0
 PREHOOK: query: explain select count(ts) from stats_tbl_part
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(ts) from stats_tbl_part

http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out 
b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
index f43440e..ab86ab0 100644
--- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
@@ -288,6 +288,126 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain 
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: stats_tbl
+                  Statistics: Num rows: 9999 Data size: 1030908 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: s (type: string), bo (type: boolean), bin 
(type: binary), si (type: smallint), i (type: int), b (type: bigint)
+                    outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+                    Statistics: Num rows: 9999 Data size: 1030908 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(), sum(1), sum(0.2), count(1), 
count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                      Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), 
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: 
int), _col9 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), '1' (type: string), _col1 
(type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), 
_col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: 
bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                  Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from 
stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 
3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from 
stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: stats_tbl_part
+                  Statistics: Num rows: 9489 Data size: 978785 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: s (type: string), bo (type: boolean), bin 
(type: binary), si (type: smallint), i (type: int), b (type: bigint)
+                    outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+                    Statistics: Num rows: 9489 Data size: 978785 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(), sum(1), sum(0.2), count(1), 
count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                      Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), 
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: 
int), _col9 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), '1' (type: string), _col1 
(type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), 
_col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: 
bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                  Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 76 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: analyze table stats_tbl compute statistics for columns 
t,si,i,b,f,d,bo,s,bin
 PREHOOK: type: QUERY
 PREHOOK: Input: default@stats_tbl
@@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stats_tbl
 #### A masked pattern was here ####
 65536  65791   4294967296      4294967551      0.01    99.98   0.01    50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536  1       65791   4294967296      4294967551      0.01    99.98   7.0     
0.01    50.0
 PREHOOK: query: explain 
 select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), 
count(si) from stats_tbl_part
 PREHOOK: type: QUERY
@@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stats_tbl_part
 #### A masked pattern was here ####
 65536  65791   4294967296      4294967551      0.01    99.98   0.01    50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as 
three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), 
max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536  1       65791   4294967296      4294967551      0.01    99.98   7.0     
0.01    50.0
 PREHOOK: query: explain select count(ts) from stats_tbl_part
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(ts) from stats_tbl_part

Reply via email to