[45/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

hashutosh Mon, 11 Dec 2017 16:01:17 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out 
b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
index 5f74166..a5d1724 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
@@ -145,7 +145,7 @@ STAGE PLANS:
             partition values:
               year 2000
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -190,7 +190,7 @@ STAGE PLANS:
             partition values:
               year 2001
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -235,7 +235,7 @@ STAGE PLANS:
             partition values:
               year 2002
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -280,7 +280,7 @@ STAGE PLANS:
             partition values:
               year 2003
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -322,12 +322,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_1d
-          Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain extended select state,locid from loc_orc_1d
@@ -348,7 +348,7 @@ STAGE PLANS:
             partition values:
               year 2000
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -393,7 +393,7 @@ STAGE PLANS:
             partition values:
               year 2001
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -438,7 +438,7 @@ STAGE PLANS:
             partition values:
               year 2002
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -483,7 +483,7 @@ STAGE PLANS:
             partition values:
               year 2003
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -525,12 +525,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_1d
-          Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute 
statistics for columns state
@@ -579,7 +579,7 @@ STAGE PLANS:
             partition values:
               year 2000
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -624,7 +624,7 @@ STAGE PLANS:
             partition values:
               year 2001
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -669,7 +669,7 @@ STAGE PLANS:
             partition values:
               year 2002
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -714,7 +714,7 @@ STAGE PLANS:
             partition values:
               year 2003
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -782,7 +782,7 @@ STAGE PLANS:
             partition values:
               year 2000
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -827,7 +827,7 @@ STAGE PLANS:
             partition values:
               year 2001
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -872,7 +872,7 @@ STAGE PLANS:
             partition values:
               year 2002
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -917,7 +917,7 @@ STAGE PLANS:
             partition values:
               year 2003
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid,zip
@@ -959,12 +959,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_1d
-          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: create table if not exists loc_orc_2d (
@@ -1068,7 +1068,7 @@ STAGE PLANS:
               year 2001
               zip 43201
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1114,7 +1114,7 @@ STAGE PLANS:
               year 2002
               zip 43201
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1160,7 +1160,7 @@ STAGE PLANS:
               year 2003
               zip 43201
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1206,7 +1206,7 @@ STAGE PLANS:
               year 2000
               zip 94086
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1298,7 +1298,7 @@ STAGE PLANS:
               year 2002
               zip 94086
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1344,7 +1344,7 @@ STAGE PLANS:
               year 2003
               zip 94086
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1390,7 +1390,7 @@ STAGE PLANS:
               year 2000
               zip 94087
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1436,7 +1436,7 @@ STAGE PLANS:
               year 2001
               zip 94087
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1528,7 +1528,7 @@ STAGE PLANS:
               year 2003
               zip 94087
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1570,12 +1570,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_2d
-          Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain extended select state,locid from loc_orc_2d
@@ -1597,7 +1597,7 @@ STAGE PLANS:
               year 2001
               zip 43201
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1643,7 +1643,7 @@ STAGE PLANS:
               year 2002
               zip 43201
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1689,7 +1689,7 @@ STAGE PLANS:
               year 2003
               zip 43201
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1735,7 +1735,7 @@ STAGE PLANS:
               year 2000
               zip 94086
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1827,7 +1827,7 @@ STAGE PLANS:
               year 2002
               zip 94086
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1873,7 +1873,7 @@ STAGE PLANS:
               year 2003
               zip 94086
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1919,7 +1919,7 @@ STAGE PLANS:
               year 2000
               zip 94087
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -1965,7 +1965,7 @@ STAGE PLANS:
               year 2001
               zip 94087
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -2057,7 +2057,7 @@ STAGE PLANS:
               year 2003
               zip 94087
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns state,locid
@@ -2099,11 +2099,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_2d
-          Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink


http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out 
b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
index 9d00ce6..884bfcd 100644
--- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
+++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
@@ -38,22 +38,22 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: g
+            alias: f
             Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (value <> '') (type: boolean)
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: value (type: string)
+                expressions: key (type: int)
                 outputColumnNames: _col0
                 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string)
+                  key expressions: _col0 (type: int)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 0
                   auto parallelism: false
@@ -70,13 +70,13 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string)
+                  key expressions: _col0 (type: int)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 1
-                  value expressions: _col0 (type: int)
+                  value expressions: _col1 (type: string)
                   auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
@@ -89,7 +89,7 @@ STAGE PLANS:
             partition values:
               ds 2008-04-08
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,value
@@ -129,16 +129,16 @@ STAGE PLANS:
               name: default.filter_join_breaktask
             name: default.filter_join_breaktask
       Truncated Path -> Alias:
-        /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g, $hdt$_1:m]
+        /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f, $hdt$_2:m]
       Needs Tagging: true
       Reduce Operator Tree:
         Join Operator
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col0 (type: string)
-            1 _col1 (type: string)
-          outputColumnNames: _col0, _col2
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col3
           Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
@@ -150,8 +150,8 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 properties:
                   column.name.delimiter ,
-                  columns _col0,_col2
-                  columns.types string,int
+                  columns _col0,_col3
+                  columns.types int,string
                   escape.delim \
                   serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -165,31 +165,31 @@ STAGE PLANS:
           TableScan
             GatherStats: false
             Reduce Output Operator
-              key expressions: _col2 (type: int)
+              key expressions: _col3 (type: string)
               null sort order: a
               sort order: +
-              Map-reduce partition columns: _col2 (type: int)
+              Map-reduce partition columns: _col3 (type: string)
               Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE 
Column stats: NONE
               tag: 0
-              value expressions: _col0 (type: string)
+              value expressions: _col0 (type: int)
               auto parallelism: false
           TableScan
-            alias: f
+            alias: g
             Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: key is not null (type: boolean)
+              predicate: (value <> '') (type: boolean)
               Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: int)
+                expressions: value (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: int)
+                  key expressions: _col0 (type: string)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 1
                   auto parallelism: false
@@ -203,8 +203,8 @@ STAGE PLANS:
             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
             properties:
               column.name.delimiter ,
-              columns _col0,_col2
-              columns.types string,int
+              columns _col0,_col3
+              columns.types int,string
               escape.delim \
               serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
             serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -213,8 +213,8 @@ STAGE PLANS:
               output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
               properties:
                 column.name.delimiter ,
-                columns _col0,_col2
-                columns.types string,int
+                columns _col0,_col3
+                columns.types int,string
                 escape.delim \
                 serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
               serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -226,7 +226,7 @@ STAGE PLANS:
             partition values:
               ds 2008-04-08
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,value
@@ -266,7 +266,7 @@ STAGE PLANS:
               name: default.filter_join_breaktask
             name: default.filter_join_breaktask
       Truncated Path -> Alias:
-        /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f]
+        /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g]
 #### A masked pattern was here ####
       Needs Tagging: true
       Reduce Operator Tree:
@@ -274,12 +274,12 @@ STAGE PLANS:
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col2 (type: int)
-            1 _col0 (type: int)
+            0 _col3 (type: string)
+            1 _col0 (type: string)
           outputColumnNames: _col0, _col5
           Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
-            expressions: _col5 (type: int), _col0 (type: string)
+            expressions: _col0 (type: int), _col5 (type: string)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby10.q.out 
b/ql/src/test/results/clientpositive/groupby10.q.out
index 9f90a2c..79e4fd2 100644
--- a/ql/src/test/results/clientpositive/groupby10.q.out
+++ b/ql/src/test/results/clientpositive/groupby10.q.out
@@ -44,11 +44,15 @@ STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-0, Stage-6, Stage-11
+  Stage-5 depends on stages: Stage-3
   Stage-6 depends on stages: Stage-5
-  Stage-1 depends on stages: Stage-6
-  Stage-7 depends on stages: Stage-1
+  Stage-9 depends on stages: Stage-1, Stage-6, Stage-11
+  Stage-7 depends on stages: Stage-2
+  Stage-8 depends on stages: Stage-7
+  Stage-1 depends on stages: Stage-8
+  Stage-10 depends on stages: Stage-8
+  Stage-11 depends on stages: Stage-10
 
 STAGE PLANS:
   Stage: Stage-2
@@ -119,6 +123,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int), _col2 (type: 
int)
+              outputColumnNames: key, val1, val2
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -133,12 +147,68 @@ STAGE PLANS:
   Stage: Stage-4
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: int, int, int
+          Table: default.dest1
 
   Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
+              sort order: 
+              Map-reduce partition columns: rand() (type: double)
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          mode: partial1
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-9
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: int, int, int
+          Table: default.dest2
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
               key expressions: key (type: int), substr(value, 5) (type: string)
               sort order: ++
               Map-reduce partition columns: key (type: int)
@@ -157,7 +227,7 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-6
+  Stage: Stage-8
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -186,6 +256,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest2
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int), _col2 (type: 
int)
+              outputColumnNames: key, val1, val2
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Move Operator
@@ -197,9 +277,49 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-7
-    Stats Work
-      Basic Stats Work:
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Map-reduce partition columns: rand() (type: double)
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          mode: partial1
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-11
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM INPUT
 INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), 
count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
@@ -291,11 +411,15 @@ STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-0, Stage-6, Stage-11
+  Stage-5 depends on stages: Stage-3
   Stage-6 depends on stages: Stage-5
-  Stage-1 depends on stages: Stage-6
-  Stage-7 depends on stages: Stage-1
+  Stage-9 depends on stages: Stage-1, Stage-6, Stage-11
+  Stage-7 depends on stages: Stage-2
+  Stage-8 depends on stages: Stage-7
+  Stage-1 depends on stages: Stage-8
+  Stage-10 depends on stages: Stage-8
+  Stage-11 depends on stages: Stage-10
 
 STAGE PLANS:
   Stage: Stage-2
@@ -366,6 +490,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int), _col2 (type: 
int)
+              outputColumnNames: key, val1, val2
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -380,12 +514,68 @@ STAGE PLANS:
   Stage: Stage-4
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: int, int, int
+          Table: default.dest1
 
   Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
+              sort order: 
+              Map-reduce partition columns: rand() (type: double)
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          mode: partial1
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-9
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: int, int, int
+          Table: default.dest2
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
               key expressions: key (type: int), substr(value, 5) (type: string)
               sort order: ++
               Map-reduce partition columns: key (type: int)
@@ -404,7 +594,7 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-6
+  Stage: Stage-8
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -433,6 +623,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest2
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int), _col2 (type: 
int)
+              outputColumnNames: key, val1, val2
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Move Operator
@@ -444,9 +644,49 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-7
-    Stats Work
-      Basic Stats Work:
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Map-reduce partition columns: rand() (type: double)
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          mode: partial1
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-11
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM INPUT
 INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), 
count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
@@ -537,9 +777,11 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+  Stage-4 depends on stages: Stage-2
+  Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
   Stage-1 depends on stages: Stage-2
-  Stage-4 depends on stages: Stage-1
+  Stage-6 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-2
@@ -578,6 +820,16 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.dest1
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: int)
+                outputColumnNames: key, val1, val2
+                Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
           Group By Operator
             aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT 
KEY._col1:1._col0)
             keys: KEY._col0 (type: int)
@@ -596,6 +848,16 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.dest2
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: int)
+                outputColumnNames: key, val1, val2
+                Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -610,6 +872,40 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: int, int, int
+          Table: default.dest1
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          mode: complete
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-5
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: int, int, int
+          Table: default.dest2
 
   Stage: Stage-1
     Move Operator
@@ -621,9 +917,27 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-4
-    Stats Work
-      Basic Stats Work:
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          mode: complete
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM INPUT
 INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct 
substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby11.q.out 
b/ql/src/test/results/clientpositive/groupby11.q.out
index 2ab4c39..592be1d 100644
--- a/ql/src/test/results/clientpositive/groupby11.q.out
+++ b/ql/src/test/results/clientpositive/groupby11.q.out
@@ -32,11 +32,15 @@ STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-5 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-0, Stage-6, Stage-11
+  Stage-5 depends on stages: Stage-3
   Stage-6 depends on stages: Stage-5
-  Stage-1 depends on stages: Stage-6
-  Stage-7 depends on stages: Stage-1
+  Stage-9 depends on stages: Stage-1, Stage-6, Stage-11
+  Stage-7 depends on stages: Stage-2
+  Stage-8 depends on stages: Stage-7
+  Stage-1 depends on stages: Stage-8
+  Stage-10 depends on stages: Stage-8
+  Stage-11 depends on stages: Stage-10
 
 STAGE PLANS:
   Stage: Stage-2
@@ -107,6 +111,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: int), _col2 
(type: int)
+              outputColumnNames: key, val1, val2
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -123,12 +137,77 @@ STAGE PLANS:
   Stage: Stage-4
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: string, int, int
+          Table: default.dest1
 
   Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
+              key expressions: '111' (type: string)
+              sort order: +
+              Map-reduce partition columns: rand() (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: string), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          keys: '111' (type: string)
+          mode: partial1
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: '111' (type: string)
+              sort order: +
+              Map-reduce partition columns: '111' (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          keys: '111' (type: string)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '111' (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-9
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val1, val2
+          Column Types: string, int, int
+          Table: default.dest2
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
               key expressions: substr(value, 5) (type: string), key (type: 
string)
               sort order: ++
               Map-reduce partition columns: substr(value, 5) (type: string)
@@ -147,7 +226,7 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-6
+  Stage: Stage-8
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -176,6 +255,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest2
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: int), _col2 
(type: int)
+              outputColumnNames: key, val1, val2
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Move Operator
@@ -189,9 +278,58 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest2
 
-  Stage: Stage-7
-    Stats Work
-      Basic Stats Work:
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: '111' (type: string)
+              sort order: +
+              Map-reduce partition columns: rand() (type: double)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: string), val1 (type: int), val2 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+          keys: '111' (type: string)
+          mode: partial1
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-11
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: '111' (type: string)
+              sort order: +
+              Map-reduce partition columns: '111' (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          keys: '111' (type: string)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '111' (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src
 INSERT OVERWRITE TABLE dest1 partition(ds='111')

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby12.q.out 
b/ql/src/test/results/clientpositive/groupby12.q.out
index 2f633f0..cd0b7fb 100644
--- a/ql/src/test/results/clientpositive/groupby12.q.out
+++ b/ql/src/test/results/clientpositive/groupby12.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -54,6 +55,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -68,6 +79,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), value (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll')
+          mode: complete
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src
 INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) 
GROUP BY src.key

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby1_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_map.q.out 
b/ql/src/test/results/clientpositive/groupby1_map.q.out
index 337c2e0..82fe1f0 100644
--- a/ql/src/test/results/clientpositive/groupby1_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_map.q.out
@@ -15,7 +15,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -59,6 +60,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: double)
+              outputColumnNames: key, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -73,6 +89,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, double
+          Table: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, 
sum(substr(src.value,5)) GROUP BY src.key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out 
b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
index 337c2e0..82fe1f0 100644
--- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
@@ -15,7 +15,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -59,6 +60,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: double)
+              outputColumnNames: key, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -73,6 +89,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, double
+          Table: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, 
sum(substr(src.value,5)) GROUP BY src.key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby1_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out 
b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out
index a140a02..44b4db7 100644
--- a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out
@@ -16,7 +16,8 @@ STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4
+  Stage-4 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -84,6 +85,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: double)
+              outputColumnNames: key, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -98,6 +114,32 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, double
+          Table: default.dest1
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, 
sum(substr(src.value,5)) GROUP BY src.key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby1_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_noskew.q.out 
b/ql/src/test/results/clientpositive/groupby1_noskew.q.out
index 702ea3e..ba4366d 100644
--- a/ql/src/test/results/clientpositive/groupby1_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_noskew.q.out
@@ -15,7 +15,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -53,6 +54,16 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest_g1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: double)
+              outputColumnNames: key, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -67,6 +78,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, double
+          Table: default.dest_g1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: key (type: int), value (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll')
+          mode: complete
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, 
sum(substr(src.value,5)) GROUP BY src.key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby2_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby2_map.q.out 
b/ql/src/test/results/clientpositive/groupby2_map.q.out
index 427590d..262c5d8 100644
--- a/ql/src/test/results/clientpositive/groupby2_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_map.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -61,6 +62,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: int), _col2 
(type: string)
+              outputColumnNames: key, c1, c2
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(c1, 
'hll'), compute_stats(c2, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -75,6 +91,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, c1, c2
+          Column Types: string, int, string
+          Table: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT 
substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) 
GROUP BY substr(src.key,1,1)

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out 
b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out
index 0ab1985..581e958 100644
--- a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -61,6 +62,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: int), _col2 
(type: string), _col3 (type: int), _col4 (type: int)
+              outputColumnNames: key, c1, c2, c3, c4
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(c1, 
'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -75,6 +91,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, c1, c2, c3, c4
+          Column Types: string, int, string, int, int
+          Table: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3), compute_stats(VALUE._col4)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT 
substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), 
sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY 
substr(src.key,1,1)
@@ -120,7 +162,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -164,6 +207,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest1
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: int), _col2 
(type: string), _col3 (type: int), _col4 (type: int)
+              outputColumnNames: key, c1, c2, c3, c4
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(c1, 
'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -178,6 +236,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, c1, c2, c3, c4
+          Column Types: string, int, string, int, int
+          Table: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3), compute_stats(VALUE._col4)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT 
substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), 
sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY 
substr(src.key,1,1)

[45/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

Reply via email to