[18/24] hive git commit: HIVE-16996: Add HLL as an alternative to FM sketch to compute stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan, Prasanth Jayachandran)

pxiong Sat, 15 Jul 2017 01:07:00 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
index 29b3373..1b12570 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
@@ -59,7 +59,7 @@ STAGE PLANS:
                 outputColumnNames: key, value, one, two, three
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: compute_stats(key, 16), compute_stats(value, 
16)
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
                   keys: one (type: string), two (type: string), three (type: 
string)
                   mode: hash
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -69,7 +69,7 @@ STAGE PLANS:
                     sort order: +++
                     Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    value expressions: _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col4 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                    value expressions: _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col4 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)


http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_7.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
index 9d24bc5..9e2121e 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
@@ -132,10 +132,10 @@ STAGE PLANS:
               value expressions: key (type: string), c1 (type: int), c2 (type: 
string)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: compute_stats(VALUE._col0, 16), 
compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16)
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
           mode: partial1
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -149,17 +149,17 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+              Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
           mode: final
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
index 681d962..cdf2082 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
@@ -104,7 +104,7 @@ STAGE PLANS:
                   outputColumnNames: key, value, ds, hr
                   Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
-                    aggregations: compute_stats(key, 16), compute_stats(value, 
16)
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
                     keys: ds (type: string), hr (type: string)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3
@@ -116,7 +116,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
                       Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
                       tag: -1
-                      value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                      value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
                       auto parallelism: false
             Filter Operator
               isSamplingPred: false
@@ -161,7 +161,7 @@ STAGE PLANS:
                   outputColumnNames: key, value, hr
                   Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
-                    aggregations: compute_stats(key, 16), compute_stats(value, 
16)
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
                     keys: '2008-12-31' (type: string), hr (type: string)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3
@@ -177,7 +177,7 @@ STAGE PLANS:
                           properties:
                             column.name.delimiter ,
                             columns _col0,_col1,_col2,_col3
-                            columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
+                            columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>
                             escape.delim \
                             serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
                           serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -511,7 +511,7 @@ STAGE PLANS:
               Map-reduce partition columns: '2008-12-31' (type: string), _col1 
(type: string)
               Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE 
Column stats: NONE
               tag: -1
-              value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+              value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
               auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
@@ -524,7 +524,7 @@ STAGE PLANS:
             properties:
               column.name.delimiter ,
               columns _col0,_col1,_col2,_col3
-              columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
+              columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>
               escape.delim \
               serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
             serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -534,7 +534,7 @@ STAGE PLANS:
               properties:
                 column.name.delimiter ,
                 columns _col0,_col1,_col2,_col3
-                columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
+                columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>
                 escape.delim \
                 serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
               serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
index d26e2c0..06f23b1 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
@@ -86,10 +86,10 @@ STAGE PLANS:
               outputColumnNames: key, value
               Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(key, 16), compute_stats(value, 16)
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -142,10 +142,10 @@ STAGE PLANS:
                   outputColumnNames: key, value
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
-                    aggregations: compute_stats(key, 16), compute_stats(value, 
16)
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
                     mode: hash
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 968 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 944 Basic stats: 
COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
                       table:
@@ -181,17 +181,17 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+              Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -252,7 +252,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@dest_j1
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-key                    int                     0                       498     
                0                       196                                     
                                                                                
from deserializer   
+key                    int                     0                       498     
                0                       309                                     
                                                                                
from deserializer   
 COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
                                                                        
 PREHOOK: query: desc formatted dest_j1 value
 PREHOOK: type: DESCTABLE
@@ -262,5 +262,5 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@dest_j1
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-value                  string                                                  
                0                       214                     
6.834630350194552       7                                                       
                from deserializer   
+value                  string                                                  
                0                       309                     
6.834630350194552       7                                                       
                from deserializer   
 COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
                                                                        

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out 
b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
index 17a912e..57f0067 100644
--- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
@@ -285,24 +285,24 @@ STAGE PLANS:
   Stage: Stage-14
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_1:c 
+        $hdt$_1:b 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_1:c 
+        $hdt$_1:b 
           TableScan
-            alias: c
+            alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: value is not null (type: boolean)
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: value (type: string)
+                expressions: key (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 HashTable Sink Operator
                   keys:
-                    0 _col1 (type: string)
+                    0 _col0 (type: string)
                     1 _col0 (type: string)
 
   Stage: Stage-9
@@ -322,7 +322,7 @@ STAGE PLANS:
                   condition map:
                        Inner Join 0 to 1
                   keys:
-                    0 _col1 (type: string)
+                    0 _col0 (type: string)
                     1 _col0 (type: string)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
@@ -341,24 +341,24 @@ STAGE PLANS:
   Stage: Stage-12
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_2:b 
+        $hdt$_2:c 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_2:b 
+        $hdt$_2:c 
           TableScan
-            alias: b
+            alias: c
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
+              predicate: value is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: string)
+                expressions: value (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 HashTable Sink Operator
                   keys:
-                    0 _col0 (type: string)
+                    0 _col1 (type: string)
                     1 _col0 (type: string)
 
   Stage: Stage-6
@@ -369,7 +369,7 @@ STAGE PLANS:
               condition map:
                    Inner Join 0 to 1
               keys:
-                0 _col0 (type: string)
+                0 _col1 (type: string)
                 1 _col0 (type: string)
               outputColumnNames: _col0, _col1
               Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE 
Column stats: NONE
@@ -411,20 +411,20 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: b
+            alias: c
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
+              predicate: value is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: string)
+                expressions: value (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
-                    0 _col0 (type: string)
+                    0 _col1 (type: string)
                     1 _col0 (type: string)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
@@ -449,19 +449,19 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string)
+              key expressions: _col1 (type: string)
               sort order: +
-              Map-reduce partition columns: _col0 (type: string)
+              Map-reduce partition columns: _col1 (type: string)
               Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col1 (type: string)
+              value expressions: _col0 (type: string)
           TableScan
-            alias: b
+            alias: c
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: key is not null (type: boolean)
+              predicate: value is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: string)
+                expressions: value (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
@@ -474,7 +474,7 @@ STAGE PLANS:
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col0 (type: string)
+            0 _col1 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE 
Column stats: NONE
@@ -505,27 +505,27 @@ STAGE PLANS:
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 HashTable Sink Operator
                   keys:
-                    0 _col1 (type: string)
+                    0 _col0 (type: string)
                     1 _col0 (type: string)
 
   Stage: Stage-10
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: c
+            alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: value is not null (type: boolean)
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: value (type: string)
+                expressions: key (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
-                    0 _col1 (type: string)
+                    0 _col0 (type: string)
                     1 _col0 (type: string)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
@@ -552,19 +552,19 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string)
+                  key expressions: _col0 (type: string)
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string)
+                  value expressions: _col1 (type: string)
           TableScan
-            alias: c
+            alias: b
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: value is not null (type: boolean)
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: value (type: string)
+                expressions: key (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
@@ -577,7 +577,7 @@ STAGE PLANS:
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col1 (type: string)
+            0 _col0 (type: string)
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/avro_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out 
b/ql/src/test/results/clientpositive/avro_decimal.q.out
index 5a3b72d..e1045eb 100644
--- a/ql/src/test/results/clientpositive/avro_decimal.q.out
+++ b/ql/src/test/results/clientpositive/avro_decimal.q.out
@@ -34,7 +34,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@dec
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-value                  decimal(8,4)            -12.25                  234.79  
                0                       6                                       
                                                                                
from deserializer   
+value                  decimal(8,4)            -12.25                  234.79  
                0                       10                                      
                                                                                
from deserializer   
 COLUMN_STATS_ACCURATE  {\"COLUMN_STATS\":{\"value\":\"true\"}}                 
                                                 
 PREHOOK: query: DROP TABLE IF EXISTS avro_dec
 PREHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/avro_decimal_native.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out 
b/ql/src/test/results/clientpositive/avro_decimal_native.q.out
index fe77512..b73b5f5 100644
--- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out
+++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out
@@ -38,7 +38,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@dec
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-value                  decimal(8,4)            -12.25                  234.79  
                0                       6                                       
                                                                                
from deserializer   
+value                  decimal(8,4)            -12.25                  234.79  
                0                       10                                      
                                                                                
from deserializer   
 COLUMN_STATS_ACCURATE  {\"COLUMN_STATS\":{\"value\":\"true\"}}                 
                                                 
 PREHOOK: query: DROP TABLE IF EXISTS avro_dec
 PREHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out 
b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index f260f03..23f5fcf 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -129,13 +129,13 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: state, locid, $f2
-          Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column 
stats: PARTIAL
           Group By Operator
             aggregations: min(locid)
             keys: state (type: string), $f2 (type: bigint)
             mode: hash
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE 
Column stats: PARTIAL
             File Output Operator
               compressed: false
               table:
@@ -151,7 +151,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: bigint)
               sort order: ++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
bigint)
-              Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE 
Column stats: PARTIAL
               value expressions: _col2 (type: int)
       Reduce Operator Tree:
         Group By Operator
@@ -159,10 +159,10 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
           mode: mergepartial
           outputColumnNames: state, $f2, $f2_0
-          Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column 
stats: PARTIAL
           File Output Operator
             compressed: false
-            Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE 
Column stats: PARTIAL
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out 
b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
index b9cf3ce..29499a1 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
@@ -68,14 +68,14 @@ STAGE PLANS:
             1 key (type: string)
             2 key (type: string)
           outputColumnNames: key, c_int, key0, c_int0
-          Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string), c_int (type: int), key0 (type: 
string), c_int0 (type: int)
             outputColumnNames: key, c_int, p, q
-            Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -730,14 +730,14 @@ STAGE PLANS:
             2 key (type: string)
             3 key (type: string)
           outputColumnNames: key, c_int, key0, c_int0, key1, c_int2
-          Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string), c_int (type: int), key0 (type: 
string), c_int0 (type: int), key1 (type: string), c_int2 (type: int)
             outputColumnNames: key, c_int, p, q, x, b
-            Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1620 Data size: 432273 Basic stats: 
COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1080 Data size: 288093 Basic stats: 
COMPLETE Column stats: COMPLETE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/char_udf1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.out 
b/ql/src/test/results/clientpositive/char_udf1.q.out
index 07ce108..fefc740 100644
--- a/ql/src/test/results/clientpositive/char_udf1.q.out
+++ b/ql/src/test/results/clientpositive/char_udf1.q.out
@@ -393,15 +393,15 @@ POSTHOOK: Input: default@char_udf_1
 #### A masked pattern was here ####
 val_238        val_238 true
 PREHOOK: query: select
-  compute_stats(c2, 16),
-  compute_stats(c4, 16)
+  compute_stats(c2, 'fm', 16),
+  compute_stats(c4, 'fm', 16)
 from char_udf_1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@char_udf_1
 #### A masked pattern was here ####
 POSTHOOK: query: select
-  compute_stats(c2, 16),
-  compute_stats(c4, 16)
+  compute_stats(c2, 'fm', 16),
+  compute_stats(c4, 'fm', 16)
 from char_udf_1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@char_udf_1

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out 
b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
index 14c5d5b..0f28225 100644
--- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
+++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
@@ -43,7 +43,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@all_nulls
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-a                      bigint                  0                       0       
                5                       1                                       
                                                                                
from deserializer   
+a                      bigint                  0                       0       
                5                       0                                       
                                                                                
from deserializer   
 COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
                                                                       
 PREHOOK: query: describe formatted all_nulls b
 PREHOOK: type: DESCTABLE
@@ -53,7 +53,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@all_nulls
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-b                      double                  0.0                     0.0     
                5                       1                                       
                                                                                
from deserializer   
+b                      double                  0.0                     0.0     
                5                       0                                       
                                                                                
from deserializer   
 COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
                                                                       
 PREHOOK: query: drop table all_nulls
 PREHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out 
b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
index 96feeed..9925928 100644
--- a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
+++ b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
@@ -108,7 +108,7 @@ STAGE PLANS:
               value expressions: key (type: int), value (type: string)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: compute_stats(VALUE._col0, 16), 
compute_stats(VALUE._col2, 16)
+          aggregations: compute_stats(VALUE._col0, 'hll'), 
compute_stats(VALUE._col2, 'hll')
           mode: complete
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column 
stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out 
b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index 07d26e9..5ecb205 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -52,7 +52,7 @@ STAGE PLANS:
               outputColumnNames: employeeid
               Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16)
+                aggregations: compute_stats(employeeid, 'hll')
                 keys: 2000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
@@ -62,7 +62,7 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: 2000.0 (type: double)
                   Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0)
@@ -112,7 +112,7 @@ STAGE PLANS:
               outputColumnNames: employeeid
               Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16)
+                aggregations: compute_stats(employeeid, 'hll')
                 keys: 2000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
@@ -124,7 +124,7 @@ STAGE PLANS:
                   Map-reduce partition columns: 2000.0 (type: double)
                   Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
                   tag: -1
-                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
                   auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
@@ -254,7 +254,7 @@ STAGE PLANS:
               outputColumnNames: employeeid
               Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16)
+                aggregations: compute_stats(employeeid, 'hll')
                 keys: 4000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
@@ -264,7 +264,7 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: 4000.0 (type: double)
                   Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0)
@@ -314,7 +314,7 @@ STAGE PLANS:
               outputColumnNames: employeeid
               Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16)
+                aggregations: compute_stats(employeeid, 'hll')
                 keys: 4000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
@@ -326,7 +326,7 @@ STAGE PLANS:
                   Map-reduce partition columns: 4000.0 (type: double)
                   Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
                   tag: -1
-                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
                   auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
@@ -456,7 +456,7 @@ STAGE PLANS:
               outputColumnNames: employeeid, employeename
               Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
+                aggregations: compute_stats(employeeid, 'hll'), 
compute_stats(employeename, 'hll')
                 keys: 2000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
@@ -466,7 +466,7 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: 2000.0 (type: double)
                   Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -511,7 +511,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      34      
                1                       14                                      
                                                                                
from deserializer   
+employeeID             int                     16                      34      
                1                       12                                      
                                                                                
from deserializer   
 PREHOOK: query: describe formatted Employee_Part partition 
(employeeSalary=2000.0) employeeName
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@employee_part
@@ -520,7 +520,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeName           string                                                  
                1                       9                       
4.3076923076923075      6                                                       
                from deserializer   
+employeeName           string                                                  
                1                       12                      
4.3076923076923075      6                                                       
                from deserializer   
 PREHOOK: query: explain 
 analyze table Employee_Part  compute statistics for columns
 PREHOOK: type: QUERY
@@ -543,7 +543,7 @@ STAGE PLANS:
               outputColumnNames: employeeid, employeename, employeesalary
               Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
+                aggregations: compute_stats(employeeid, 'hll'), 
compute_stats(employeename, 'hll')
                 keys: employeesalary (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
@@ -553,7 +553,7 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col0 (type: double)
                   Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -600,7 +600,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      34      
                1                       14                                      
                                                                                
from deserializer   
+employeeID             int                     16                      34      
                1                       12                                      
                                                                                
from deserializer   
 PREHOOK: query: describe formatted Employee_Part 
partition(employeeSalary=4000.0) employeeID
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@employee_part
@@ -609,7 +609,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      34      
                1                       14                                      
                                                                                
from deserializer   
+employeeID             int                     16                      34      
                1                       12                                      
                                                                                
from deserializer   
 PREHOOK: query: explain 
 analyze table Employee_Part  compute statistics for columns
 PREHOOK: type: QUERY
@@ -632,23 +632,23 @@ STAGE PLANS:
               outputColumnNames: employeeid, employeename
               Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
+                aggregations: compute_stats(employeeid, 'hll'), 
compute_stats(employeename, 'hll')
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -681,7 +681,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      34      
                2                       14                                      
                                                                                
from deserializer   
+employeeID             int                     16                      34      
                2                       12                                      
                                                                                
from deserializer   
 COLUMN_STATS_ACCURATE  
{\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}          
                                                         
 PREHOOK: query: create database if not exists dummydb
 PREHOOK: type: CREATEDATABASE
@@ -713,7 +713,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      34      
                1                       14                                      
                                                                                
from deserializer   
+employeeID             int                     16                      34      
                1                       12                                      
                                                                                
from deserializer   
 COLUMN_STATS_ACCURATE  
{\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}          
                                                         
 PREHOOK: query: analyze table default.Employee_Part  compute statistics for 
columns
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out 
b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 468d2e7..a64c76b 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
               outputColumnNames: employeeid, employeename, country
               Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeename, 16), 
compute_stats(employeeid, 16)
+                aggregations: compute_stats(employeename, 'hll'), 
compute_stats(employeeid, 'hll')
                 keys: 4000.0 (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
@@ -98,7 +98,7 @@ STAGE PLANS:
                   sort order: ++
                   Map-reduce partition columns: 4000.0 (type: double), _col1 
(type: string)
                   Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -143,7 +143,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeName           string                                                  
                0                       6                       
5.142857142857143       6                                                       
                from deserializer   
+employeeName           string                                                  
                0                       7                       
5.142857142857143       6                                                       
                from deserializer   
 PREHOOK: query: explain        
 analyze table Employee_Part partition (employeeSalary='2000.0') compute 
statistics for columns employeeID
 PREHOOK: type: QUERY
@@ -166,7 +166,7 @@ STAGE PLANS:
               outputColumnNames: employeeid, country
               Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16)
+                aggregations: compute_stats(employeeid, 'hll')
                 keys: 2000.0 (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
@@ -176,7 +176,7 @@ STAGE PLANS:
                   sort order: ++
                   Map-reduce partition columns: 2000.0 (type: double), _col1 
(type: string)
                   Statistics: Num rows: 42 Data size: 169 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0)
@@ -223,7 +223,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      34      
                1                       14                                      
                                                                                
from deserializer   
+employeeID             int                     16                      34      
                1                       12                                      
                                                                                
from deserializer   
 PREHOOK: query: describe formatted Employee_Part partition 
(employeeSalary='2000.0', country='UK') employeeID
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@employee_part
@@ -232,7 +232,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      31      
                0                       9                                       
                                                                                
from deserializer   
+employeeID             int                     16                      31      
                0                       7                                       
                                                                                
from deserializer   
 PREHOOK: query: explain        
 analyze table Employee_Part partition (employeeSalary) compute statistics for 
columns employeeID
 PREHOOK: type: QUERY
@@ -255,7 +255,7 @@ STAGE PLANS:
               outputColumnNames: employeeid, employeesalary, country
               Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16)
+                aggregations: compute_stats(employeeid, 'hll')
                 keys: employeesalary (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
@@ -265,7 +265,7 @@ STAGE PLANS:
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: double), _col1 
(type: string)
                   Statistics: Num rows: 116 Data size: 466 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0)
@@ -320,7 +320,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeID             int                     16                      34      
                1                       14                                      
                                                                                
from deserializer   
+employeeID             int                     16                      34      
                1                       12                                      
                                                                                
from deserializer   
 PREHOOK: query: explain        
 analyze table Employee_Part partition (employeeSalary,country) compute 
statistics for columns
 PREHOOK: type: QUERY
@@ -343,7 +343,7 @@ STAGE PLANS:
               outputColumnNames: employeeid, employeename, employeesalary, 
country
               Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL 
Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
+                aggregations: compute_stats(employeeid, 'hll'), 
compute_stats(employeename, 'hll')
                 keys: employeesalary (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
@@ -353,7 +353,7 @@ STAGE PLANS:
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: double), _col1 
(type: string)
                   Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -408,7 +408,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee_part
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeName           string                                                  
                0                       6                       
5.142857142857143       6                                                       
                from deserializer   
+employeeName           string                                                  
                0                       12                      
5.142857142857143       6                                                       
                from deserializer   
 PREHOOK: query: drop table Employee
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table Employee
@@ -483,7 +483,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeName           string                                                  
                0                       6                       
5.142857142857143       6                                                       
                from deserializer   
+employeeName           string                                                  
                0                       12                      
5.142857142857143       6                                                       
                from deserializer   
 PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO 
TABLE Employee partition(employeeSalary='3000.0', country='USA')
 PREHOOK: type: LOAD
 #### A masked pattern was here ####
@@ -530,7 +530,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-employeeName           string                                                  
                0                       6                       
5.142857142857143       6                                                       
                from deserializer   
+employeeName           string                                                  
                0                       12                      
5.142857142857143       6                                                       
                from deserializer   
 PREHOOK: query: alter table Employee add columns (c int ,d string)
 PREHOOK: type: ALTERTABLE_ADDCOLS
 PREHOOK: Input: default@employee
@@ -575,7 +575,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@employee
 # col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
                                                                                
 
-c                      int                     2000                    4000    
                0                       4                                       
                                                                                
from deserializer   
+c                      int                     2000                    4000    
                0                       3                                       
                                                                                
from deserializer   
 PREHOOK: query: describe formatted Employee partition 
(employeeSalary='6000.0', country='UK') d
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@employee

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/columnstats_quoting.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out 
b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
index 52e3538..7e080fe 100644
--- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
@@ -30,23 +30,23 @@ STAGE PLANS:
               outputColumnNames: user id, user name
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Group By Operator
-                aggregations: compute_stats(user id, 16), compute_stats(user 
name, 16)
+                aggregations: compute_stats(user id, 'hll'), 
compute_stats(user name, 'hll')
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -87,14 +87,14 @@ STAGE PLANS:
               outputColumnNames: user id
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Group By Operator
-                aggregations: compute_stats(user id, 16)
+                aggregations: compute_stats(user id, 'hll')
                 mode: hash
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0)

[18/24] hive git commit: HIVE-16996: Add HLL as an alternative to FM sketch to compute stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan, Prasanth Jayachandran)

Reply via email to