[35/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

hashutosh Mon, 11 Dec 2017 16:00:23 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/input_part2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input_part2.q.out 
b/ql/src/test/results/clientpositive/input_part2.q.out
index 6942b23..c8ca522 100644
--- a/ql/src/test/results/clientpositive/input_part2.q.out
+++ b/ql/src/test/results/clientpositive/input_part2.q.out
@@ -29,17 +29,13 @@ STAGE DEPENDENCIES:
   Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
   Stage-5
   Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-10
   Stage-4
   Stage-6
   Stage-7 depends on stages: Stage-6
-  Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, 
Stage-12
-  Stage-11
-  Stage-1 depends on stages: Stage-11, Stage-10, Stage-13
-  Stage-9 depends on stages: Stage-1
-  Stage-10
-  Stage-12
-  Stage-13 depends on stages: Stage-12
+  Stage-1 depends on stages: Stage-2
+  Stage-9 depends on stages: Stage-1, Stage-10
+  Stage-10 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-2
@@ -89,6 +85,22 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: true
                   MultiFileSpray: false
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '12' 
(type: string), '2008-04-08' (type: string)
+                  outputColumnNames: key, value, hr, ds
+                  Statistics: Num rows: 333 Data size: 3537 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 1 Data size: 1744 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 1744 Basic stats: 
COMPLETE Column stats: NONE
+                      tag: -1
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                      auto parallelism: false
             Filter Operator
               isSamplingPred: false
               predicate: ((ds = '2008-04-09') and (key < 100)) (type: boolean)
@@ -129,6 +141,33 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: true
                   MultiFileSpray: false
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), '12' 
(type: string), '2008-04-09' (type: string)
+                  outputColumnNames: key, value, hr, ds
+                  Statistics: Num rows: 333 Data size: 3537 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 1 Data size: 1744 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            column.name.delimiter ,
+                            columns _col0,_col1,_col2,_col3
+                            columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>
+                            escape.delim \
+                            serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -231,6 +270,35 @@ STAGE PLANS:
       Truncated Path -> Alias:
         /srcpart/ds=2008-04-08/hr=12 [srcpart]
         /srcpart/ds=2008-04-09/hr=12 [srcpart]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-8
     Conditional Operator
@@ -273,6 +341,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value, hr, ds
+          Column Types: int, string, string, string
+          Table: default.dest1
+          Is Table Level Stats: true
 
   Stage: Stage-4
     Map Reduce
@@ -454,15 +527,6 @@ STAGE PLANS:
           hdfs directory: true
 #### A masked pattern was here ####
 
-  Stage: Stage-14
-    Conditional Operator
-
-  Stage: Stage-11
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
-
   Stage: Stage-1
     Move Operator
       tables:
@@ -495,186 +559,80 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value, hr, ds
+          Column Types: int, string, string, string
+          Table: default.dest2
+          Is Table Level Stats: true
 
   Stage: Stage-10
     Map Reduce
       Map Operator Tree:
           TableScan
             GatherStats: false
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-#### A masked pattern was here ####
-              NumFilesPerFileSink: 1
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}}
-                    bucket_count -1
-                    column.name.delimiter ,
-                    columns key,value,hr,ds
-                    columns.comments 
-                    columns.types int:string:string:string
-#### A masked pattern was here ####
-                    name default.dest2
-                    numFiles 0
-                    numRows 0
-                    rawDataSize 0
-                    serialization.ddl struct dest2 { i32 key, string value, 
string hr, string ds}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 0
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest2
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
 #### A masked pattern was here ####
           Partition
-            base file name: -ext-10005
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            base file name: -mr-10005
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}}
-              bucket_count -1
               column.name.delimiter ,
-              columns key,value,hr,ds
-              columns.comments 
-              columns.types int:string:string:string
-#### A masked pattern was here ####
-              name default.dest2
-              numFiles 0
-              numRows 0
-              rawDataSize 0
-              serialization.ddl struct dest2 { i32 key, string value, string 
hr, string ds}
-              serialization.format 1
-              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 0
-#### A masked pattern was here ####
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              columns _col0,_col1,_col2,_col3
+              columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
           
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}}
-                bucket_count -1
                 column.name.delimiter ,
-                columns key,value,hr,ds
-                columns.comments 
-                columns.types int:string:string:string
-#### A masked pattern was here ####
-                name default.dest2
-                numFiles 0
-                numRows 0
-                rawDataSize 0
-                serialization.ddl struct dest2 { i32 key, string value, string 
hr, string ds}
-                serialization.format 1
-                serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 0
-#### A masked pattern was here ####
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.dest2
-            name: default.dest2
+                columns _col0,_col1,_col2,_col3
+                columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Truncated Path -> Alias:
 #### A masked pattern was here ####
-
-  Stage: Stage-12
-    Map Reduce
-      Map Operator Tree:
-          TableScan
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
             GatherStats: false
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-#### A masked pattern was here ####
-              NumFilesPerFileSink: 1
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}}
-                    bucket_count -1
-                    column.name.delimiter ,
-                    columns key,value,hr,ds
-                    columns.comments 
-                    columns.types int:string:string:string
-#### A masked pattern was here ####
-                    name default.dest2
-                    numFiles 0
-                    numRows 0
-                    rawDataSize 0
-                    serialization.ddl struct dest2 { i32 key, string value, 
string hr, string ds}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 0
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest2
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
-      Path -> Alias:
-#### A masked pattern was here ####
-      Path -> Partition:
-#### A masked pattern was here ####
-          Partition
-            base file name: -ext-10005
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-            properties:
-              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}}
-              bucket_count -1
-              column.name.delimiter ,
-              columns key,value,hr,ds
-              columns.comments 
-              columns.types int:string:string:string
-#### A masked pattern was here ####
-              name default.dest2
-              numFiles 0
-              numRows 0
-              rawDataSize 0
-              serialization.ddl struct dest2 { i32 key, string value, string 
hr, string ds}
-              serialization.format 1
-              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 0
-#### A masked pattern was here ####
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-          
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}}
-                bucket_count -1
-                column.name.delimiter ,
-                columns key,value,hr,ds
-                columns.comments 
-                columns.types int:string:string:string
-#### A masked pattern was here ####
-                name default.dest2
-                numFiles 0
-                numRows 0
-                rawDataSize 0
-                serialization.ddl struct dest2 { i32 key, string value, string 
hr, string ds}
-                serialization.format 1
-                serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 0
-#### A masked pattern was here ####
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.dest2
-            name: default.dest2
-      Truncated Path -> Alias:
-#### A masked pattern was here ####
-
-  Stage: Stage-13
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
+            MultiFileSpray: false
 
 PREHOOK: query: FROM srcpart
 INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, 
srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr 
= '12'


http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/input_part5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input_part5.q.out 
b/ql/src/test/results/clientpositive/input_part5.q.out
index 7da77fb..33ecd59 100644
--- a/ql/src/test/results/clientpositive/input_part5.q.out
+++ b/ql/src/test/results/clientpositive/input_part5.q.out
@@ -46,6 +46,32 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.tmptable
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), _col3 (type: string)
+                  outputColumnNames: key, value, hr, ds
+                  Statistics: Num rows: 333 Data size: 3537 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 1 Data size: 1760 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 1760 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -69,6 +95,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value, hr, ds
+          Column Types: string, string, string, string
+          Table: default.tmptable
 
   Stage: Stage-3
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/input_testsequencefile.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input_testsequencefile.q.out 
b/ql/src/test/results/clientpositive/input_testsequencefile.q.out
index c248d03..847d045 100644
--- a/ql/src/test/results/clientpositive/input_testsequencefile.q.out
+++ b/ql/src/test/results/clientpositive/input_testsequencefile.q.out
@@ -43,6 +43,32 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.dest4_sequencefile
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -66,6 +92,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.dest4_sequencefile
 
   Stage: Stage-3
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/input_testxpath.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input_testxpath.q.out 
b/ql/src/test/results/clientpositive/input_testxpath.q.out
index a68a500..ddda55e 100644
--- a/ql/src/test/results/clientpositive/input_testxpath.q.out
+++ b/ql/src/test/results/clientpositive/input_testxpath.q.out
@@ -43,6 +43,32 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.dest1
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string)
+                outputColumnNames: key, value, mapvalue
+                Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll'), compute_stats(mapvalue, 'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 1304 Basic stats: 
COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 1304 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -66,6 +92,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value, mapvalue
+          Column Types: int, string, string
+          Table: default.dest1
 
   Stage: Stage-3
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/input_testxpath2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input_testxpath2.q.out 
b/ql/src/test/results/clientpositive/input_testxpath2.q.out
index ed45157..5c40743 100644
--- a/ql/src/test/results/clientpositive/input_testxpath2.q.out
+++ b/ql/src/test/results/clientpositive/input_testxpath2.q.out
@@ -46,6 +46,32 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.dest1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: int)
+                  outputColumnNames: lint_size, lintstring_size, 
mstringstring_size
+                  Statistics: Num rows: 11 Data size: 3070 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(lint_size, 'hll'), 
compute_stats(lintstring_size, 'hll'), compute_stats(mstringstring_size, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 1272 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 1272 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -69,6 +95,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: lint_size, lintstring_size, mstringstring_size
+          Column Types: int, int, int
+          Table: default.dest1
 
   Stage: Stage-3
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/insert1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert1.q.out 
b/ql/src/test/results/clientpositive/insert1.q.out
index aeb89eb..b3a635b 100644
--- a/ql/src/test/results/clientpositive/insert1.q.out
+++ b/ql/src/test/results/clientpositive/insert1.q.out
@@ -60,6 +60,32 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.insert1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -83,6 +109,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert1
 
   Stage: Stage-3
     Map Reduce
@@ -150,6 +180,32 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.insert1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -173,6 +229,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert1
 
   Stage: Stage-3
     Map Reduce
@@ -254,6 +314,32 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: x.insert1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -277,6 +363,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: x.insert1
 
   Stage: Stage-3
     Map Reduce
@@ -344,6 +434,32 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.insert1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -367,6 +483,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert1
 
   Stage: Stage-3
     Map Reduce
@@ -413,17 +533,13 @@ STAGE DEPENDENCIES:
   Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
   Stage-5
   Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-10
   Stage-4
   Stage-6
   Stage-7 depends on stages: Stage-6
-  Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, 
Stage-12
-  Stage-11
-  Stage-1 depends on stages: Stage-11, Stage-10, Stage-13
-  Stage-9 depends on stages: Stage-1
-  Stage-10
-  Stage-12
-  Stage-13 depends on stages: Stage-12
+  Stage-1 depends on stages: Stage-2
+  Stage-9 depends on stages: Stage-1, Stage-10
+  Stage-10 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-2
@@ -447,6 +563,19 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.insert1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Filter Operator
               predicate: ((key < 20) and (key > 10)) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
@@ -462,6 +591,34 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: x.insert1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
PARTIAL Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-8
     Conditional Operator
@@ -485,6 +642,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert1
 
   Stage: Stage-4
     Map Reduce
@@ -516,15 +677,6 @@ STAGE PLANS:
           hdfs directory: true
 #### A masked pattern was here ####
 
-  Stage: Stage-14
-    Conditional Operator
-
-  Stage: Stage-11
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
-
   Stage: Stage-1
     Move Operator
       tables:
@@ -538,36 +690,32 @@ STAGE PLANS:
   Stage: Stage-9
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: x.insert1
 
   Stage: Stage-10
     Map Reduce
       Map Operator Tree:
           TableScan
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: x.insert1
-
-  Stage: Stage-12
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: x.insert1
-
-  Stage: Stage-13
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: CREATE DATABASE db2
 PREHOOK: type: CREATEDATABASE

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out 
b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out
index a9378f8..b17bc11 100644
--- a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out
+++ b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out
@@ -41,7 +41,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -58,7 +59,6 @@ STAGE PLANS:
                 key expressions: _col0 (type: string), _col1 (type: string)
                 sort order: --
                 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
-                TopN Hash Memory Usage: 0.1
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 
(type: string)
@@ -75,6 +75,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.destintable
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), 
'2011-11-11' (type: string), '11' (type: string)
+              outputColumnNames: one, two, ds, hr
+              Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(one, 'hll'), compute_stats(two, 
'hll')
+                keys: ds (type: string), hr (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -92,6 +108,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: one, two
+          Column Types: string, string
+          Table: default.destintable
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
+              Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE destinTable PARTITION (ds='2011-11-11', 
hr='11') if not exists
 SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one 
desc, two desc limit 5
@@ -175,7 +224,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -192,7 +242,6 @@ STAGE PLANS:
                 key expressions: _col0 (type: string), _col1 (type: string)
                 sort order: --
                 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
-                TopN Hash Memory Usage: 0.1
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 
(type: string)
@@ -209,6 +258,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.destintable
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string)
+              outputColumnNames: one, two
+              Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(one, 'hll'), compute_stats(two, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -223,6 +287,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: one, two
+          Column Types: string, string
+          Table: default.destintable
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE destinTable SELECT one,two FROM 
sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 
5
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out 
b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out
index c8bfdc6..77913f0 100644
--- a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out
+++ b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out
@@ -52,7 +52,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -69,7 +70,6 @@ STAGE PLANS:
                 key expressions: _col0 (type: string), _col1 (type: string)
                 sort order: --
                 Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE 
Column stats: NONE
-                TopN Hash Memory Usage: 0.1
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 
(type: string)
@@ -86,6 +86,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: db2.destintable
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), 
'2011-11-11' (type: string)
+              outputColumnNames: one, two, ds
+              Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(one, 'hll'), compute_stats(two, 
'hll')
+                keys: ds (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -102,6 +118,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: one, two
+          Column Types: string, string
+          Table: db2.destintable
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION 
(ds='2011-11-11')
 SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, 
two desc limit 5
@@ -141,7 +190,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -158,7 +208,6 @@ STAGE PLANS:
                 key expressions: _col0 (type: string), _col1 (type: string)
                 sort order: --
                 Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE 
Column stats: NONE
-                TopN Hash Memory Usage: 0.1
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 
(type: string)
@@ -175,6 +224,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: db2.destintable
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), 
'2011-11-11' (type: string)
+              outputColumnNames: one, two, ds
+              Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(one, 'hll'), compute_stats(two, 
'hll')
+                keys: ds (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -191,6 +256,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: one, two
+          Column Types: string, string
+          Table: db2.destintable
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION 
(ds='2011-11-11')
 SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, 
two desc limit 5

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/insert_into1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_into1.q.out 
b/ql/src/test/results/clientpositive/insert_into1.q.out
index 3d1438a..023a6fb 100644
--- a/ql/src/test/results/clientpositive/insert_into1.q.out
+++ b/ql/src/test/results/clientpositive/insert_into1.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -34,7 +35,6 @@ STAGE PLANS:
                 key expressions: _col0 (type: string)
                 sort order: +
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                TopN Hash Memory Usage: 0.1
                 value expressions: _col1 (type: string)
       Reduce Operator Tree:
         Select Operator
@@ -56,6 +56,21 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.insert_into1
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 100 Data size: 1000 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -70,6 +85,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert_into1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key 
LIMIT 100
 PREHOOK: type: QUERY
@@ -126,7 +167,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -143,7 +185,6 @@ STAGE PLANS:
                 key expressions: _col0 (type: string)
                 sort order: +
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                TopN Hash Memory Usage: 0.1
                 value expressions: _col1 (type: string)
       Reduce Operator Tree:
         Select Operator
@@ -165,6 +206,21 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.insert_into1
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 100 Data size: 1000 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -179,6 +235,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert_into1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key 
LIMIT 100
 PREHOOK: type: QUERY
@@ -235,7 +317,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -252,7 +335,6 @@ STAGE PLANS:
                 key expressions: _col0 (type: string)
                 sort order: +
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                TopN Hash Memory Usage: 0.1
                 value expressions: _col1 (type: string)
       Reduce Operator Tree:
         Select Operator
@@ -274,6 +356,21 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.insert_into1
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -288,6 +385,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert_into1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY 
key LIMIT 10
 PREHOOK: type: QUERY
@@ -371,6 +494,32 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.insert_into1
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -394,6 +543,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert_into1
 
   Stage: Stage-3
     Map Reduce
@@ -469,6 +622,32 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.insert_into1
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string)
+                outputColumnNames: key, value
+                Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -492,6 +671,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.insert_into1
 
   Stage: Stage-3
     Map Reduce

[35/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

Reply via email to