[25/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

hashutosh Mon, 11 Dec 2017 16:00:17 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out 
b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
index 57eead0..14f1efe 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
@@ -111,7 +111,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -192,7 +193,7 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin_part
             Truncated Path -> Alias:
               /srcbucket_mapjoin_part/ds=2008-04-08 [a]
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -319,6 +320,53 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 58 Data size: 16921 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-2
     Dependency Collection
@@ -355,6 +403,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
 PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result 
 select /*+mapjoin(b)*/ a.key, a.value, b.value 
@@ -484,7 +537,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -565,7 +619,7 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin_part
             Truncated Path -> Alias:
               /srcbucket_mapjoin_part/ds=2008-04-08 [a]
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -671,7 +725,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                           bucket_count -1
                           column.name.delimiter ,
                           columns key,value1,value2
@@ -692,6 +746,53 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 58 Data size: 16921 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-2
     Dependency Collection
@@ -705,7 +806,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,value1,value2
@@ -728,6 +829,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
 PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result 
 select /*+mapjoin(a)*/ a.key, a.value, b.value 
@@ -874,7 +980,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -955,7 +1062,7 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin_part
             Truncated Path -> Alias:
               /srcbucket_mapjoin_part/ds=2008-04-08 [a]
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -1110,7 +1217,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                           bucket_count -1
                           column.name.delimiter ,
                           columns key,value1,value2
@@ -1131,6 +1238,53 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 61 Data size: 17884 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-2
     Dependency Collection
@@ -1144,7 +1298,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,value1,value2
@@ -1167,6 +1321,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
 PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
 select /*+mapjoin(b)*/ a.key, a.value, b.value


http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out 
b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
index 184e890..161631a 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
@@ -135,7 +135,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -216,7 +217,7 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin_part_2
             Truncated Path -> Alias:
               /srcbucket_mapjoin_part_2/ds=2008-04-08 [a]
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -343,6 +344,53 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 58 Data size: 16921 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-2
     Dependency Collection
@@ -379,6 +427,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
 PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result 
 select /*+mapjoin(b)*/ a.key, a.value, b.value 
@@ -508,7 +561,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -589,7 +643,7 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin_part_2
             Truncated Path -> Alias:
               /srcbucket_mapjoin_part_2/ds=2008-04-08 [a]
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -695,7 +749,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                           bucket_count -1
                           column.name.delimiter ,
                           columns key,value1,value2
@@ -716,6 +770,53 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 58 Data size: 16921 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-2
     Dependency Collection
@@ -729,7 +830,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,value1,value2
@@ -752,6 +853,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
 PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result 
 select /*+mapjoin(a)*/ a.key, a.value, b.value 

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out 
b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
index b353073..c07f722 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
@@ -135,7 +135,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -214,7 +215,7 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin
             Truncated Path -> Alias:
               /srcbucket_mapjoin [a]
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -339,6 +340,53 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 1 Data size: 206 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-2
     Dependency Collection
@@ -375,6 +423,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
 PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
 select /*+mapjoin(b)*/ a.key, a.value, b.value
@@ -492,7 +545,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -571,7 +625,7 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin
             Truncated Path -> Alias:
               /srcbucket_mapjoin [a]
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -675,7 +729,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                           bucket_count -1
                           column.name.delimiter ,
                           columns key,value1,value2
@@ -696,6 +750,53 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 1 Data size: 206 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                        escape.delim \
+                        hive.serialization.extend.additional.nesting.levels 
true
+                        serialization.escape.crlf true
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-2
     Dependency Collection
@@ -709,7 +810,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,value1,value2
@@ -732,6 +833,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
 PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
 select /*+mapjoin(a)*/ a.key, a.value, b.value

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out 
b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
index b907c2d..ce24832 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
@@ -101,25 +101,26 @@ STAGE PLANS:
       Edges:
         Map 2 <- Map 1 (BROADCAST_EDGE)
         Reducer 3 <- Map 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: a
-                  Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 10 Data size: 1800 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10 Data size: 1800 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -127,14 +128,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: b
-                  Statistics: Num rows: 84 Data size: 15964 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 84 Data size: 15036 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -144,16 +145,16 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col4
                         input vertices:
                           0 Map 1
-                        Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 84 Data size: 15540 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col4) 
(type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -163,15 +164,51 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.test_table3
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '1' 
(type: string)
+                  outputColumnNames: key, value, ds
+                  Statistics: Num rows: 84 Data size: 22932 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: ds (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 965 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -191,6 +228,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, concat(a.value, b.value) 
@@ -281,25 +322,26 @@ STAGE PLANS:
       Edges:
         Map 2 <- Map 1 (BROADCAST_EDGE)
         Reducer 3 <- Map 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: a
-                  Statistics: Num rows: 20 Data size: 3900 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: PARTIAL
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 19 Data size: 3705 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 19 Data size: 3705 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 20 Data size: 5580 Basic stats: 
COMPLETE Column stats: PARTIAL
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 19 Data size: 3705 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 20 Data size: 5580 Basic stats: 
COMPLETE Column stats: PARTIAL
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -307,14 +349,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: b
-                  Statistics: Num rows: 84 Data size: 15964 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 84 Data size: 15036 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -324,16 +366,16 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col4
                         input vertices:
                           0 Map 1
-                        Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 84 Data size: 15540 Basic stats: 
COMPLETE Column stats: PARTIAL
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col4) 
(type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: PARTIAL
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: PARTIAL
                             value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -343,15 +385,51 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.test_table3
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '1' 
(type: string)
+                  outputColumnNames: key, value, ds
+                  Statistics: Num rows: 84 Data size: 22932 Basic stats: 
COMPLETE Column stats: PARTIAL
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: ds (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: PARTIAL
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Select Operator
+                  expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: PARTIAL
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 965 Basic stats: 
COMPLETE Column stats: PARTIAL
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -371,6 +449,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, concat(a.value, b.value) 
@@ -485,25 +567,26 @@ STAGE PLANS:
       Edges:
         Map 2 <- Map 1 (BROADCAST_EDGE)
         Reducer 3 <- Map 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: a
-                  Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 10 Data size: 1800 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10 Data size: 1800 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -511,14 +594,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: b
-                  Statistics: Num rows: 168 Data size: 31740 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 168 Data size: 15792 Basic stats: 
COMPLETE Column stats: PARTIAL
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 160 Data size: 30228 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 168 Data size: 15792 Basic stats: 
COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 160 Data size: 30228 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 168 Data size: 46704 Basic stats: 
COMPLETE Column stats: PARTIAL
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -528,16 +611,16 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col4
                         input vertices:
                           0 Map 1
-                        Statistics: Num rows: 176 Data size: 33250 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 168 Data size: 31080 Basic 
stats: COMPLETE Column stats: PARTIAL
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col4) 
(type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 176 Data size: 33250 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 168 Data size: 31584 Basic 
stats: COMPLETE Column stats: PARTIAL
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 176 Data size: 33250 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 168 Data size: 31584 Basic 
stats: COMPLETE Column stats: PARTIAL
                             value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -547,15 +630,51 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 176 Data size: 33250 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 168 Data size: 31584 Basic stats: 
COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 176 Data size: 33250 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 168 Data size: 31584 Basic stats: 
COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.test_table3
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '1' 
(type: string)
+                  outputColumnNames: key, value, ds
+                  Statistics: Num rows: 168 Data size: 45864 Basic stats: 
COMPLETE Column stats: PARTIAL
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: ds (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: PARTIAL
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Select Operator
+                  expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: PARTIAL
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 965 Basic stats: 
COMPLETE Column stats: PARTIAL
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -575,6 +694,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, concat(a.value, b.value) 
@@ -695,25 +818,26 @@ STAGE PLANS:
       Edges:
         Map 2 <- Map 1 (BROADCAST_EDGE)
         Reducer 3 <- Map 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: test_table1
-                  Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -721,14 +845,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_table2
-                  Statistics: Num rows: 84 Data size: 15964 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -738,16 +862,16 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col3
                         input vertices:
                           0 Map 1
-                        Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 84 Data size: 15540 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col3) 
(type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -757,15 +881,51 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.test_table3
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '1' 
(type: string)
+                  outputColumnNames: key, value, ds
+                  Statistics: Num rows: 84 Data size: 22932 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: ds (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 965 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -785,6 +945,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, concat(a.value, b.value) 
@@ -887,25 +1051,26 @@ STAGE PLANS:
       Edges:
         Map 2 <- Map 1 (BROADCAST_EDGE)
         Reducer 3 <- Map 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: test_table1
-                  Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), concat(value, value) 
(type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 10 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10 Data size: 1880 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -913,14 +1078,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_table2
-                  Statistics: Num rows: 84 Data size: 15964 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), concat(value, value) 
(type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -930,16 +1095,16 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col3
                         input vertices:
                           0 Map 1
-                        Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 84 Data size: 31248 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), concat(_col1, _col3) 
(type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -949,15 +1114,51 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.test_table3
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '1' 
(type: string)
+                  outputColumnNames: key, value, ds
+                  Statistics: Num rows: 84 Data size: 22932 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: ds (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 965 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -977,6 +1178,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, concat(a.v1, b.v2) 
@@ -1079,25 +1284,26 @@ STAGE PLANS:
       Edges:
         Map 2 <- Map 1 (BROADCAST_EDGE)
         Reducer 3 <- Map 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: test_table1
-                  Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 10 Data size: 1950 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -1105,14 +1311,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_table2
-                  Statistics: Num rows: 84 Data size: 15964 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 80 Data size: 15203 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -1122,16 +1328,16 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col3
                         input vertices:
                           0 Map 1
-                        Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 84 Data size: 15540 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: (_col0 + _col0) (type: int), 
concat(_col1, _col3) (type: string)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 88 Data size: 16723 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 84 Data size: 15792 Basic 
stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -1141,15 +1347,51 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 88 Data size: 16723 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 84 Data size: 15792 Basic stats: 
COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.test_table3
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '1' 
(type: string)
+                  outputColumnNames: key, value, ds
+                  Statistics: Num rows: 84 Data size: 22932 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: ds (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 949 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 965 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -1169,6 +1411,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key+a.key, concat(a.value, b.value)

[25/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

Reply via email to