[2/9] hive git commit: HIVE-20915: Make dynamic sort partition optimization available to HoS and MR (Yongzhi Chen, reviewed by Naveen Gangam)

ychena Thu, 06 Dec 2018 06:36:20 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out 
b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out
index 090e9cb..3ca922d 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out
@@ -73,9 +73,12 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-2
     Spark
+      Edges:
+        Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -87,14 +90,17 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string), 
ds (type: string), hr (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.TextInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                            name: default.nzhang_part1_n0
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string), _col3 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col2 (type: string), 
_col3 (type: string)
+                        value expressions: _col0 (type: string), _col1 (type: 
string)
+            Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (ds > '2008-04-08') (type: boolean)
                     Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
@@ -102,15 +108,42 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string), 
hr (type: string)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.TextInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                            name: default.nzhang_part2_n0
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        value expressions: _col0 (type: string), _col1 (type: 
string)
+            Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string), KEY._col3 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part1_n0
+        Reducer 3 
             Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part2_n0
 
   Stage: Stage-0
     Move Operator


http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out 
b/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out
index 1885f9c..5fd3a04 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out
@@ -56,6 +56,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -68,15 +70,27 @@ STAGE PLANS:
                     expressions: key (type: string), value (type: string), hr 
(type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.nzhang_part10
+                    Reduce Output Operator
+                      key expressions: _col2 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col2 (type: string)
+                      value expressions: _col0 (type: string), _col1 (type: 
string)
             Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part10
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out 
b/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out
index 4e90e95..625d60c9 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out
@@ -58,6 +58,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -70,15 +72,27 @@ STAGE PLANS:
                     expressions: key (type: string), value (type: string), ds 
(type: string), hr (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.nzhang_part3
+                    Reduce Output Operator
+                      key expressions: _col2 (type: string), _col3 (type: 
string)
+                      sort order: ++
+                      Map-reduce partition columns: _col2 (type: string), 
_col3 (type: string)
+                      value expressions: _col0 (type: string), _col1 (type: 
string)
             Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string), KEY._col3 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part3
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out 
b/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out
index 2a0eddb..811becc 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out
@@ -68,6 +68,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -80,15 +82,27 @@ STAGE PLANS:
                     expressions: key (type: string), value (type: string), ds 
(type: string), hr (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.nzhang_part4
+                    Reduce Output Operator
+                      key expressions: _col2 (type: string), _col3 (type: 
string)
+                      sort order: ++
+                      Map-reduce partition columns: _col2 (type: string), 
_col3 (type: string)
+                      value expressions: _col0 (type: string), _col1 (type: 
string)
             Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string), KEY._col3 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part4
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out 
b/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out
index 33815e0..8e9f813 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out
@@ -37,6 +37,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -48,15 +50,27 @@ STAGE PLANS:
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.nzhang_part5
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: string)
+                      value expressions: _col0 (type: string)
             Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY._col1 (type: 
string)
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part5
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out 
b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out
index b59189a..aebf438 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out
@@ -65,9 +65,12 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-2
     Spark
+      Edges:
+        Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -81,35 +84,225 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string), 
ds (type: string), hr (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        GlobalTableId: 1
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string), _col3 (type: 
string)
+                        null sort order: aa
+                        sort order: ++
+                        Map-reduce partition columns: _col2 (type: string), 
_col3 (type: string)
+                        tag: -1
+                        value expressions: _col0 (type: string), _col1 (type: 
string)
+                        auto parallelism: false
+            Execution mode: vectorized
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: hr=11
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-08
+                    hr 11
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    column.name.delimiter ,
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.srcpart
+                    numFiles 1
+                    numRows 500
+                    partition_columns ds/hr
+                    partition_columns.types string:string
+                    rawDataSize 5312
+                    serialization.ddl struct srcpart { string key, string 
value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.srcpart
+                      partition_columns ds/hr
+                      partition_columns.types string:string
+                      serialization.ddl struct srcpart { string key, string 
value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.srcpart
+                  name: default.srcpart
+#### A masked pattern was here ####
+                Partition
+                  base file name: hr=12
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-08
+                    hr 12
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    column.name.delimiter ,
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.srcpart
+                    numFiles 1
+                    numRows 500
+                    partition_columns ds/hr
+                    partition_columns.types string:string
+                    rawDataSize 5312
+                    serialization.ddl struct srcpart { string key, string 
value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.srcpart
+                      partition_columns ds/hr
+                      partition_columns.types string:string
+                      serialization.ddl struct srcpart { string key, string 
value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.srcpart
+                  name: default.srcpart
+#### A masked pattern was here ####
+                Partition
+                  base file name: hr=11
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-09
+                    hr 11
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    column.name.delimiter ,
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.srcpart
+                    numFiles 1
+                    numRows 500
+                    partition_columns ds/hr
+                    partition_columns.types string:string
+                    rawDataSize 5312
+                    serialization.ddl struct srcpart { string key, string 
value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.srcpart
+                      partition_columns ds/hr
+                      partition_columns.types string:string
+                      serialization.ddl struct srcpart { string key, string 
value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.srcpart
+                  name: default.srcpart
+#### A masked pattern was here ####
+                Partition
+                  base file name: hr=12
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-09
+                    hr 12
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    column.name.delimiter ,
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
 #### A masked pattern was here ####
-                        NumFilesPerFileSink: 1
-                        Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                    name default.srcpart
+                    numFiles 1
+                    numRows 500
+                    partition_columns ds/hr
+                    partition_columns.types string:string
+                    rawDataSize 5312
+                    serialization.ddl struct srcpart { string key, string 
value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
 #### A masked pattern was here ####
-                        table:
-                            input format: 
org.apache.hadoop.mapred.TextInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                            properties:
-                              bucket_count -1
-                              column.name.delimiter ,
-                              columns key,value
-                              columns.comments 'default','default'
-                              columns.types string:string
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
 #### A masked pattern was here ####
-                              name default.nzhang_part8_n0
-                              partition_columns ds/hr
-                              partition_columns.types string:string
-                              serialization.ddl struct nzhang_part8_n0 { 
string key, string value}
-                              serialization.format 1
-                              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name default.srcpart
+                      partition_columns ds/hr
+                      partition_columns.types string:string
+                      serialization.ddl struct srcpart { string key, string 
value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                            name: default.nzhang_part8_n0
-                        TotalFiles: 1
-                        GatherStats: true
-                        MultiFileSpray: false
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.srcpart
+                  name: default.srcpart
+            Truncated Path -> Alias:
+              /srcpart/ds=2008-04-08/hr=11 [srcpart]
+              /srcpart/ds=2008-04-08/hr=12 [srcpart]
+              /srcpart/ds=2008-04-09/hr=11 [srcpart]
+              /srcpart/ds=2008-04-09/hr=12 [srcpart]
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+                  GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (ds > '2008-04-08') (type: boolean)
@@ -118,36 +311,14 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string), 
hr (type: string)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        GlobalTableId: 2
-#### A masked pattern was here ####
-                        NumFilesPerFileSink: 1
-                        Static Partition Specification: ds=2008-12-31/
-                        Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-#### A masked pattern was here ####
-                        table:
-                            input format: 
org.apache.hadoop.mapred.TextInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                            properties:
-                              bucket_count -1
-                              column.name.delimiter ,
-                              columns key,value
-                              columns.comments 'default','default'
-                              columns.types string:string
-#### A masked pattern was here ####
-                              name default.nzhang_part8_n0
-                              partition_columns ds/hr
-                              partition_columns.types string:string
-                              serialization.ddl struct nzhang_part8_n0 { 
string key, string value}
-                              serialization.format 1
-                              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                            name: default.nzhang_part8_n0
-                        TotalFiles: 1
-                        GatherStats: true
-                        MultiFileSpray: false
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        null sort order: a
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        tag: -1
+                        value expressions: _col0 (type: string), _col1 (type: 
string)
+                        auto parallelism: false
             Execution mode: vectorized
             Path -> Alias:
 #### A masked pattern was here ####
@@ -353,6 +524,81 @@ STAGE PLANS:
               /srcpart/ds=2008-04-08/hr=12 [srcpart]
               /srcpart/ds=2008-04-09/hr=11 [srcpart]
               /srcpart/ds=2008-04-09/hr=12 [srcpart]
+        Reducer 2 
+            Execution mode: vectorized
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string), KEY._col3 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+#### A masked pattern was here ####
+                  Dp Sort State: PARTITION_SORTED
+                  NumFilesPerFileSink: 1
+                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      properties:
+                        bucket_count -1
+                        column.name.delimiter ,
+                        columns key,value
+                        columns.comments 'default','default'
+                        columns.types string:string
+#### A masked pattern was here ####
+                        name default.nzhang_part8_n0
+                        partition_columns ds/hr
+                        partition_columns.types string:string
+                        serialization.ddl struct nzhang_part8_n0 { string key, 
string value}
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part8_n0
+                  TotalFiles: 1
+                  GatherStats: true
+                  MultiFileSpray: false
+        Reducer 3 
+            Execution mode: vectorized
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 2
+#### A masked pattern was here ####
+                  Dp Sort State: PARTITION_SORTED
+                  NumFilesPerFileSink: 1
+                  Static Partition Specification: ds=2008-12-31/
+                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      properties:
+                        bucket_count -1
+                        column.name.delimiter ,
+                        columns key,value
+                        columns.comments 'default','default'
+                        columns.types string:string
+#### A masked pattern was here ####
+                        name default.nzhang_part8_n0
+                        partition_columns ds/hr
+                        partition_columns.types string:string
+                        serialization.ddl struct nzhang_part8_n0 { string key, 
string value}
+                        serialization.format 1
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part8_n0
+                  TotalFiles: 1
+                  GatherStats: true
+                  MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out 
b/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out
index f93a255..b62acc4 100644
--- a/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out
+++ b/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out
@@ -56,6 +56,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -68,15 +70,27 @@ STAGE PLANS:
                     expressions: key (type: string), value (type: string), ds 
(type: string), hr (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.nzhang_part9
+                    Reduce Output Operator
+                      key expressions: _col2 (type: string), _col3 (type: 
string)
+                      sort order: ++
+                      Map-reduce partition columns: _col2 (type: string), 
_col3 (type: string)
+                      value expressions: _col0 (type: string), _col1 (type: 
string)
             Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string), KEY._col3 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.nzhang_part9
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/orc_merge2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out 
b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out
index ae9750e..6d571b1 100644
--- a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out
+++ b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out
@@ -40,6 +40,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -51,14 +53,25 @@ STAGE PLANS:
                     expressions: UDFToInteger(key) (type: int), value (type: 
string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: default.orcfile_merge2a_n0
+                    Reduce Output Operator
+                      key expressions: _col2 (type: int), _col3 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col2 (type: int), _col3 
(type: int)
+                      value expressions: _col0 (type: int), _col1 (type: 
string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: 
string), KEY._col2 (type: int), KEY._col3 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.orcfile_merge2a_n0
 
   Stage: Stage-7
     Conditional Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/orc_merge7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/orc_merge7.q.out 
b/ql/src/test/results/clientpositive/spark/orc_merge7.q.out
index 8ce1547..192f8c4 100644
--- a/ql/src/test/results/clientpositive/spark/orc_merge7.q.out
+++ b/ql/src/test/results/clientpositive/spark/orc_merge7.q.out
@@ -37,6 +37,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -48,14 +50,25 @@ STAGE PLANS:
                     expressions: userid (type: bigint), string1 (type: 
string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: 
timestamp), subtype (type: double)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                     Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: default.orc_merge5a_n0
+                    Reduce Output Operator
+                      key expressions: _col5 (type: double)
+                      sort order: +
+                      Map-reduce partition columns: _col5 (type: double)
+                      value expressions: _col0 (type: bigint), _col1 (type: 
string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: 
timestamp)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: 
string), VALUE._col2 (type: double), VALUE._col3 (type: decimal(38,0)), 
VALUE._col4 (type: timestamp), KEY._col5 (type: double)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.orc_merge5a_n0
 
   Stage: Stage-0
     Move Operator
@@ -208,6 +221,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -219,14 +234,25 @@ STAGE PLANS:
                     expressions: userid (type: bigint), string1 (type: 
string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: 
timestamp), subtype (type: double)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                     Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: default.orc_merge5a_n0
+                    Reduce Output Operator
+                      key expressions: _col5 (type: double)
+                      sort order: +
+                      Map-reduce partition columns: _col5 (type: double)
+                      value expressions: _col0 (type: bigint), _col1 (type: 
string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: 
timestamp)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: 
string), VALUE._col2 (type: double), VALUE._col3 (type: decimal(38,0)), 
VALUE._col4 (type: timestamp), KEY._col5 (type: double)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.orc_merge5a_n0
 
   Stage: Stage-7
     Conditional Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out 
b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
index 6bca572..357cbfa 100644
--- a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
+++ b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
@@ -61,6 +61,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -72,14 +74,25 @@ STAGE PLANS:
                     expressions: UDFToInteger(key) (type: int), value (type: 
string), (hash(key) pmod 2) (type: int)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: default.orcfile_merge1_n0
+                    Reduce Output Operator
+                      key expressions: _col2 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col2 (type: int)
+                      value expressions: _col0 (type: int), _col1 (type: 
string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: 
string), KEY._col2 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.orcfile_merge1_n0
 
   Stage: Stage-0
     Move Operator
@@ -115,7 +128,7 @@ POSTHOOK: Lineage: orcfile_merge1_n0 
PARTITION(ds=1,part=0).key EXPRESSION [(src
 POSTHOOK: Lineage: orcfile_merge1_n0 PARTITION(ds=1,part=0).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: orcfile_merge1_n0 PARTITION(ds=1,part=1).key EXPRESSION 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: orcfile_merge1_n0 PARTITION(ds=1,part=1).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
-Found 2 items
+Found 1 items
 #### A masked pattern was here ####
 PREHOOK: query: EXPLAIN
     INSERT OVERWRITE TABLE orcfile_merge1b_n0 PARTITION (ds='1', part)
@@ -143,6 +156,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -154,14 +169,25 @@ STAGE PLANS:
                     expressions: UDFToInteger(key) (type: int), value (type: 
string), (hash(key) pmod 2) (type: int)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: default.orcfile_merge1b_n0
+                    Reduce Output Operator
+                      key expressions: _col2 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col2 (type: int)
+                      value expressions: _col0 (type: int), _col1 (type: 
string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: 
string), KEY._col2 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.orcfile_merge1b_n0
 
   Stage: Stage-7
     Conditional Operator
@@ -270,6 +296,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -281,14 +309,25 @@ STAGE PLANS:
                     expressions: UDFToInteger(key) (type: int), value (type: 
string), (hash(key) pmod 2) (type: int)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: default.orcfile_merge1c_n0
+                    Reduce Output Operator
+                      key expressions: _col2 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col2 (type: int)
+                      value expressions: _col0 (type: int), _col1 (type: 
string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: 
string), KEY._col2 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.orcfile_merge1c_n0
 
   Stage: Stage-7
     Conditional Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out 
b/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out
index 838888d..2330d9e 100644
--- a/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out
+++ b/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out
@@ -37,6 +37,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -48,14 +50,25 @@ STAGE PLANS:
                     expressions: userid (type: bigint), string1 (type: 
string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: 
timestamp), subtype (type: double)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                     Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: default.orc_merge5a
+                    Reduce Output Operator
+                      key expressions: _col5 (type: double)
+                      sort order: +
+                      Map-reduce partition columns: _col5 (type: double)
+                      value expressions: _col0 (type: bigint), _col1 (type: 
string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: 
timestamp)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: 
string), VALUE._col2 (type: double), VALUE._col3 (type: decimal(38,0)), 
VALUE._col4 (type: timestamp), KEY._col5 (type: double)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 1 Data size: 22980 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.orc_merge5a
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/stats2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/stats2.q.out 
b/ql/src/test/results/clientpositive/spark/stats2.q.out
index 55d2cb3..30339ca 100644
--- a/ql/src/test/results/clientpositive/spark/stats2.q.out
+++ b/ql/src/test/results/clientpositive/spark/stats2.q.out
@@ -30,6 +30,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -42,15 +44,27 @@ STAGE PLANS:
                     expressions: key (type: string), value (type: string), ds 
(type: string), hr (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.analyze_t1
+                    Reduce Output Operator
+                      key expressions: _col2 (type: string), _col3 (type: 
string)
+                      sort order: ++
+                      Map-reduce partition columns: _col2 (type: string), 
_col3 (type: string)
+                      value expressions: _col0 (type: string), _col1 (type: 
string)
             Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY._col2 (type: string), KEY._col3 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.analyze_t1
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/union_remove_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_17.q.out 
b/ql/src/test/results/clientpositive/spark/union_remove_17.q.out
index ab250fe..1249138 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_17.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_17.q.out
@@ -50,6 +50,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -65,16 +67,13 @@ STAGE PLANS:
                       expressions: _col0 (type: string), UDFToLong(_col1) 
(type: bigint), _col2 (type: string)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 2 Data size: 600 Basic stats: 
COMPLETE Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 2 Data size: 600 Basic stats: 
COMPLETE Column stats: NONE
-                        table:
-                            input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                            name: default.outputtbl1_n4
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        value expressions: _col0 (type: string), _col1 (type: 
bigint)
             Execution mode: vectorized
-        Map 2 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: inputtbl1_n3
@@ -87,15 +86,27 @@ STAGE PLANS:
                       expressions: _col0 (type: string), UDFToLong(_col1) 
(type: bigint), _col2 (type: string)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 2 Data size: 600 Basic stats: 
COMPLETE Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 2 Data size: 600 Basic stats: 
COMPLETE Column stats: NONE
-                        table:
-                            input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                            name: default.outputtbl1_n4
+                      Reduce Output Operator
+                        key expressions: _col2 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: string)
+                        value expressions: _col0 (type: string), _col1 (type: 
bigint)
             Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
bigint), KEY._col2 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                      name: default.outputtbl1_n4
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out 
b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
index d63819f..cbf37d7 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
@@ -429,7 +429,7 @@ STAGE PLANS:
     Spark
       Edges:
         Reducer 2 <- Map 1 (GROUP, 1)
-        Reducer 4 <- Map 1 (GROUP, 1)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 
(PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -465,36 +465,26 @@ STAGE PLANS:
                     expressions: _col0 (type: string), UDFToLong(_col1) (type: 
bigint), '2008-04-08' (type: string), _col2 (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.outputtbl3_n3
-        Reducer 4 
+                    Reduce Output Operator
+                      key expressions: _col2 (type: string), _col3 (type: 
string)
+                      sort order: ++
+                      Map-reduce partition columns: _col2 (type: string), 
_col3 (type: string)
+                      value expressions: _col0 (type: string), _col1 (type: 
bigint)
+        Reducer 3 
             Execution mode: vectorized
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), VALUE._col2 (type: string)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-                Limit
-                  Number of rows: 1000
-                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), UDFToLong(_col1) (type: 
bigint), '2008-04-08' (type: string), _col2 (type: string)
-                    outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.TextInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.outputtbl3_n3
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
bigint), KEY._col2 (type: string), KEY._col3 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.outputtbl3_n3
 
   Stage: Stage-0
     Move Operator
@@ -567,7 +557,7 @@ Database:                   default
 Table:                 outputtbl3_n3            
 #### A masked pattern was here ####
 Partition Parameters:           
-       numFiles                2                   
+       numFiles                1                   
        totalSize               6812                
 #### A masked pattern was here ####
                 

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/stats2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats2.q.out 
b/ql/src/test/results/clientpositive/stats2.q.out
index af205c1..43c1238 100644
--- a/ql/src/test/results/clientpositive/stats2.q.out
+++ b/ql/src/test/results/clientpositive/stats2.q.out
@@ -39,15 +39,25 @@ STAGE PLANS:
               expressions: key (type: string), value (type: string), ds (type: 
string), hr (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3
               Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.analyze_t1
+              Reduce Output Operator
+                key expressions: _col2 (type: string), _col3 (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col2 (type: string), _col3 
(type: string)
+                value expressions: _col0 (type: string), _col1 (type: string)
       Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), 
KEY._col2 (type: string), KEY._col3 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          File Output Operator
+            compressed: false
+            Dp Sort State: PARTITION_SORTED
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.analyze_t1
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/9f2e8e61/ql/src/test/results/clientpositive/stats4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats4.q.out 
b/ql/src/test/results/clientpositive/stats4.q.out
index e1ca68f..b1edea1 100644
--- a/ql/src/test/results/clientpositive/stats4.q.out
+++ b/ql/src/test/results/clientpositive/stats4.q.out
@@ -56,16 +56,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
-  Stage-5
-  Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
-  Stage-3 depends on stages: Stage-0, Stage-10
-  Stage-4
-  Stage-6
-  Stage-7 depends on stages: Stage-6
-  Stage-1 depends on stages: Stage-2
-  Stage-9 depends on stages: Stage-1, Stage-10
-  Stage-10 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
@@ -81,30 +76,11 @@ STAGE PLANS:
                 expressions: key (type: string), value (type: string), ds 
(type: string), hr (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      name: default.nzhang_part1
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), _col3 (type: string)
-                  outputColumnNames: key, value, ds, hr
-                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
-                    keys: ds (type: string), hr (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                      Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                Reduce Output Operator
+                  key expressions: _col2 (type: string), _col3 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col2 (type: string), _col3 
(type: string)
+                  value expressions: _col0 (type: string), _col1 (type: string)
             Filter Operator
               predicate: (ds > '2008-04-08') (type: boolean)
               Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE 
Column stats: NONE
@@ -114,55 +90,24 @@ STAGE PLANS:
                 Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
                   table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      name: default.nzhang_part2
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
-                  outputColumnNames: key, value, hr
-                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
-                    keys: '2008-12-31' (type: string), hr (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Execution mode: vectorized
       Reduce Operator Tree:
-        Group By Operator
-          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
-          keys: KEY._col0 (type: string), KEY._col1 (type: string)
-          mode: mergepartial
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), 
KEY._col2 (type: string), KEY._col3 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-          Select Operator
-            expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: string)
-            outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-              table:
-                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-8
-    Conditional Operator
-
-  Stage: Stage-5
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
+          File Output Operator
+            compressed: false
+            Dp Sort State: PARTITION_SORTED
+            Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.nzhang_part1
 
   Stage: Stage-0
     Move Operator
@@ -189,31 +134,25 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.nzhang_part1
-
-  Stage: Stage-6
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.nzhang_part1
-
-  Stage: Stage-7
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions: _col2 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col2 (type: string)
+              value expressions: _col0 (type: string), _col1 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), 
KEY._col2 (type: string)
+          outputColumnNames: _col0, _col1, _col2
+          File Output Operator
+            compressed: false
+            Dp Sort State: PARTITION_SORTED
+            Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.nzhang_part2
 
   Stage: Stage-1
     Move Operator
@@ -228,7 +167,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.nzhang_part2
 
-  Stage: Stage-9
+  Stage: Stage-5
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
@@ -236,36 +175,6 @@ STAGE PLANS:
           Column Types: string, string
           Table: default.nzhang_part2
 
-  Stage: Stage-10
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: '2008-12-31' (type: string), _col1 (type: 
string)
-              sort order: ++
-              Map-reduce partition columns: '2008-12-31' (type: string), _col1 
(type: string)
-              Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
-          keys: '2008-12-31' (type: string), KEY._col1 (type: string)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-          Select Operator
-            expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '2008-12-31' (type: string), _col1 (type: string)
-            outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
-              table:
-                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
 PREHOOK: query: from srcpart
 insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, 
hr where ds <= '2008-04-08'
 insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, 
value, hr where ds > '2008-04-08'

[2/9] hive git commit: HIVE-20915: Make dynamic sort partition optimization available to HoS and MR (Yongzhi Chen, reviewed by Naveen Gangam)

Reply via email to