hive git commit: HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet Garg, reviewed Ashutosh Chauhan)

vgarg Mon, 28 Jan 2019 18:35:34 -0800

Repository: hive
Updated Branches:
  refs/heads/master 698206a29 -> 974708336



HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet 
Garg, reviewed Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97470833
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97470833
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97470833

Branch: refs/heads/master
Commit: 974708336574dfb283a583ac1cebde27fc21b67a
Parents: 698206a
Author: Vineet Garg <vg...@apache.org>
Authored: Mon Jan 28 18:33:14 2019 -0800
Committer: Vineet Garg <vg...@apache.org>
Committed: Mon Jan 28 18:33:14 2019 -0800

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../optimizer/SortedDynPartitionOptimizer.java  |   8 +-
 .../correlation/CorrelationUtilities.java       |   2 +-
 .../ReduceSinkDeDuplicationUtils.java           |  47 +++-
 .../queries/clientpositive/reducesink_dedup.q   |  12 +-
 .../llap/dynpart_sort_opt_vectorization.q.out   | 226 +++++++++++++++++--
 .../llap/dynpart_sort_optimization.q.out        | 106 ++++++++-
 .../llap/dynpart_sort_optimization2.q.out       |  92 ++++++--
 .../clientpositive/llap/reducesink_dedup.q.out  | 163 +++++++++++++
 .../clientpositive/reducesink_dedup.q.out       | 202 ++++++++++++++++-
 10 files changed, 791 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index e0ea710..8ba0ddf 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -636,6 +636,7 @@ minillaplocal.query.files=\
   ptf_streaming.q,\
   runtime_stats_merge.q,\
   quotedid_smb.q,\
+  reducesink_dedup.q,\
   resourceplan.q,\
   results_cache_1.q,\
   results_cache_2.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index 6fd1093..9708c47 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -485,12 +485,10 @@ public class SortedDynPartitionOptimizer extends 
Transform {
       ArrayList<ExprNodeDesc> keyCols = Lists.newArrayList();
       List<Integer> newSortOrder = Lists.newArrayList();
       List<Integer> newSortNullOrder = Lists.newArrayList();
-      int numPartAndBuck = partitionPositions.size();
 
       keyColsPosInVal.addAll(partitionPositions);
       if (bucketColumns != null && !bucketColumns.isEmpty()) {
         keyColsPosInVal.add(-1);
-        numPartAndBuck += 1;
       }
       keyColsPosInVal.addAll(sortPositions);
 
@@ -501,10 +499,9 @@ public class SortedDynPartitionOptimizer extends Transform 
{
           order = 0;
         }
       }
-      for (int i = 0; i < numPartAndBuck; i++) {
+      for (int i = 0; i < keyColsPosInVal.size(); i++) {
         newSortOrder.add(order);
       }
-      newSortOrder.addAll(sortOrder);
 
       String orderStr = "";
       for (Integer i : newSortOrder) {
@@ -525,10 +522,9 @@ public class SortedDynPartitionOptimizer extends Transform 
{
           nullOrder = 1;
         }
       }
-      for (int i = 0; i < numPartAndBuck; i++) {
+      for (int i = 0; i < keyColsPosInVal.size(); i++) {
         newSortNullOrder.add(nullOrder);
       }
-      newSortNullOrder.addAll(sortNullOrder);
 
       String nullOrderStr = "";
       for (Integer i : newSortNullOrder) {

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
index 6fcd8d2..c553dca 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
@@ -387,7 +387,7 @@ public final class CorrelationUtilities {
     SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(), 
childRS.getConf().getOutputValueColumnNames());
 
     Operator<?> parent = getSingleParent(childRS);
-    parent.getChildOperators().clear();
+    parent.removeChild(childRS);
 
     SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
             select, new RowSchema(inputRS.getSignature()), parent);

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
index 7ccd4a3..6919da8 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
@@ -475,6 +475,40 @@ public class ReduceSinkDeDuplicationUtils {
     return 0;
   }
 
+  // Check that in the path between cRS and pRS, there are only Select 
operators
+  // i.e. the sequence must be pRS-SEL*-cRS
+  // ensure SEL does not branch
+  protected static boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, 
ReduceSinkOperator pRS) {
+    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
+    while (parent != pRS) {
+      assert parent.getNumParent() == 1;
+      if (!(parent instanceof SelectOperator)) {
+        return false;
+      }
+      if (parent.getChildOperators().size() > 1) {
+        return false;
+      }
+
+      parent = parent.getParentOperators().get(0);
+    }
+    return true;
+  }
+
+  // Check that in the path between cRS and pRS, there are only single branch
+  // i.e. the sequence must be pRS-Op*-cRS
+  protected static boolean checkSingleBranchOnly(ReduceSinkOperator cRS, 
ReduceSinkOperator pRS) {
+    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
+    while (parent != pRS) {
+      assert parent.getNumParent() == 1;
+      if (parent.getChildOperators().size() > 1) {
+        return false;
+      }
+
+      parent = parent.getParentOperators().get(0);
+    }
+    return true;
+  }
+
   protected static boolean aggressiveDedup(ReduceSinkOperator cRS, 
ReduceSinkOperator pRS,
           ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException {
     assert cRS.getNumParent() == 1;
@@ -484,15 +518,8 @@ public class ReduceSinkDeDuplicationUtils {
     List<ExprNodeDesc> cKeys = cConf.getKeyCols();
     List<ExprNodeDesc> pKeys = pConf.getKeyCols();
 
-    // Check that in the path between cRS and pRS, there are only Select 
operators
-    // i.e. the sequence must be pRS-SEL*-cRS
-    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
-    while (parent != pRS) {
-      assert parent.getNumParent() == 1;
-      if (!(parent instanceof SelectOperator)) {
-        return false;
-      }
-      parent = parent.getParentOperators().get(0);
+    if (!checkSelectSingleBranchOnly(cRS, pRS)) {
+      return false;
     }
 
     // If child keys are null or empty, we bail out
@@ -564,7 +591,7 @@ public class ReduceSinkDeDuplicationUtils {
 
     // Replace pRS with cRS and remove operator sequence from pRS to cRS
     // Recall that the sequence must be pRS-SEL*-cRS
-    parent = cRS.getParentOperators().get(0);
+    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
     while (parent != pRS) {
       dedupCtx.addRemovedOperator(parent);
       parent = parent.getParentOperators().get(0);

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/queries/clientpositive/reducesink_dedup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q 
b/ql/src/test/queries/clientpositive/reducesink_dedup.q
index 352a558..b7f9a01 100644
--- a/ql/src/test/queries/clientpositive/reducesink_dedup.q
+++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q
@@ -1,5 +1,13 @@
 --! qt:dataset:part
-select p_name 
+--! qt:dataset:src
+select p_name
 from (select p_name from part distribute by 1 sort by 1) p 
 distribute by 1 sort by 1
-;
\ No newline at end of file
+;
+
+create temporary table d1 (key int);
+create temporary table d2 (key int);
+
+explain from (select key from src cluster by key) a
+  insert overwrite table d1 select a.key
+  insert overwrite table d2 select a.key cluster by a.key;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out 
b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
index 53337fe..5a2cd47 100644
--- 
a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
@@ -170,6 +170,8 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -186,13 +188,61 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 11 Data size: 264 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col4 (type: tinyint), _col0 (type: 
smallint)
-                        sort order: ++
-                        Map-reduce partition columns: _col4 (type: tinyint)
-                        value expressions: _col1 (type: int), _col2 (type: 
bigint), _col3 (type: float)
+                        key expressions: _col0 (type: smallint)
+                        sort order: +
+                        Statistics: Num rows: 11 Data size: 264 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: 
bigint), _col3 (type: float), _col4 (type: tinyint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 
(type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 
(type: tinyint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                  outputColumnNames: si, i, b, f, ds, t
+                  Statistics: Num rows: 11 Data size: 1221 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                    keys: ds (type: string), t (type: tinyint)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                      Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+                Reduce Output Operator
+                  key expressions: _col4 (type: tinyint), _col0 (type: 
smallint)
+                  sort order: ++
+                  Map-reduce partition columns: _col4 (type: tinyint)
+                  value expressions: _col1 (type: int), _col2 (type: bigint), 
_col3 (type: float)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -616,6 +666,8 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -632,13 +684,61 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 11 Data size: 264 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col4 (type: tinyint), _col0 (type: 
smallint)
-                        sort order: ++
-                        Map-reduce partition columns: _col4 (type: tinyint)
-                        value expressions: _col1 (type: int), _col2 (type: 
bigint), _col3 (type: float)
+                        key expressions: _col0 (type: smallint)
+                        sort order: +
+                        Statistics: Num rows: 11 Data size: 264 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int), _col2 (type: 
bigint), _col3 (type: float), _col4 (type: tinyint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 
(type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 
(type: tinyint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                  outputColumnNames: si, i, b, f, ds, t
+                  Statistics: Num rows: 11 Data size: 1221 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                    keys: ds (type: string), t (type: tinyint)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                      Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+                Reduce Output Operator
+                  key expressions: _col4 (type: tinyint), _col0 (type: 
smallint)
+                  sort order: ++
+                  Map-reduce partition columns: _col4 (type: tinyint)
+                  value expressions: _col1 (type: int), _col2 (type: bigint), 
_col3 (type: float)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -1566,6 +1666,8 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1582,13 +1684,61 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 11 Data size: 264 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col4 (type: tinyint), _col1 (type: 
int)
-                        sort order: ++
-                        Map-reduce partition columns: _col4 (type: tinyint)
-                        value expressions: _col0 (type: smallint), _col2 
(type: bigint), _col3 (type: float)
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Statistics: Num rows: 11 Data size: 264 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: smallint), _col2 
(type: bigint), _col3 (type: float), _col4 (type: tinyint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 
(type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 
(type: tinyint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                  outputColumnNames: si, i, b, f, ds, t
+                  Statistics: Num rows: 11 Data size: 1221 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                    keys: ds (type: string), t (type: tinyint)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                      Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+                Reduce Output Operator
+                  key expressions: _col4 (type: tinyint), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col4 (type: tinyint)
+                  value expressions: _col0 (type: smallint), _col2 (type: 
bigint), _col3 (type: float)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -1909,6 +2059,7 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1933,7 +2084,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
-            Execution mode: vectorized, llap
+            Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), 
KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float)
@@ -1944,15 +2095,54 @@ STAGE PLANS:
                   expressions: _col1 (type: smallint), _col2 (type: int), 
_col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
                   Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                    outputColumnNames: si, i, b, f, ds, t
+                    Statistics: Num rows: 5 Data size: 555 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                      keys: ds (type: string), t (type: tinyint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                      Statistics: Num rows: 2 Data size: 3574 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                        Statistics: Num rows: 2 Data size: 3574 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+                  Select Operator
+                    expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                    File Output Operator
+                      compressed: false
+                      Dp Sort State: PARTITION_SORTED
+                      Statistics: Num rows: 5 Data size: 120 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.over1k_part2_orc
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 2 Data size: 3702 Basic stats: 
COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Dp Sort State: PARTITION_SORTED
-                    Statistics: Num rows: 5 Data size: 120 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2 Data size: 3702 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.over1k_part2_orc
 
   Stage: Stage-2
     Dependency Collection

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out 
b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
index cb3704f..14746aa 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
@@ -1597,6 +1597,8 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1613,16 +1615,64 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col4 (type: tinyint), _col1 (type: 
int)
-                        sort order: ++
-                        Map-reduce partition columns: _col4 (type: tinyint)
-                        value expressions: _col0 (type: smallint), _col2 
(type: bigint), _col3 (type: float)
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: smallint), _col2 
(type: bigint), _col3 (type: float), _col4 (type: tinyint)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
               Select Operator
+                expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 
(type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 
(type: tinyint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                  outputColumnNames: si, i, b, f, ds, t
+                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                    keys: ds (type: string), t (type: tinyint)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                      Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+                Reduce Output Operator
+                  key expressions: _col4 (type: tinyint), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col4 (type: tinyint)
+                  value expressions: _col0 (type: smallint), _col2 (type: 
bigint), _col3 (type: float)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
                 expressions: VALUE._col0 (type: smallint), KEY._col1 (type: 
int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: 
tinyint)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 File Output Operator
@@ -1940,6 +1990,7 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1975,15 +2026,54 @@ STAGE PLANS:
                   expressions: _col1 (type: smallint), _col2 (type: int), 
_col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
                   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                    outputColumnNames: si, i, b, f, ds, t
+                    Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                      keys: ds (type: string), t (type: tinyint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                      Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                        Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+                  Select Operator
+                    expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                    File Output Operator
+                      compressed: false
+                      Dp Sort State: PARTITION_SORTED
+                      Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.over1k_part2
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Dp Sort State: PARTITION_SORTED
                     Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                     table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.over1k_part2
 
   Stage: Stage-2
     Dependency Collection

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out 
b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
index 30074ab..6b2decd 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
@@ -111,7 +111,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
+            Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int), KEY._col1 (type: float), 
KEY._col2 (type: float)
@@ -122,15 +122,39 @@ STAGE PLANS:
                   expressions: _col1 (type: float), _col2 (type: float), _col0 
(type: int)
                   outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Dp Sort State: PARTITION_SORTED
+                  Select Operator
+                    expressions: _col0 (type: float), _col1 (type: float), 
_col2 (type: int)
+                    outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, 
ss_sold_date_sk
                     Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.ss_part
+                    Group By Operator
+                      aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), 
compute_stats(ss_net_profit, 'hll')
+                      keys: ss_sold_date_sk (type: int)
+                      mode: complete
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col1 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: int)
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  Select Operator
+                    expressions: _col0 (type: float), _col1 (type: float), 
_col2 (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    File Output Operator
+                      compressed: false
+                      Dp Sort State: PARTITION_SORTED
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.ss_part
 
   Stage: Stage-2
     Dependency Collection
@@ -352,26 +376,52 @@ STAGE PLANS:
                       Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col2 (type: int)
-                        sort order: ++
+                        sort order: +
                         Map-reduce partition columns: _col2 (type: int)
-                        value expressions: _col0 (type: float), _col1 (type: 
float)
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: float), _col1 (type: 
float), _col2 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
+            Execution mode: llap
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: float), VALUE._col1 (type: 
float), KEY._col2 (type: int)
+                expressions: VALUE._col0 (type: float), VALUE._col1 (type: 
float), VALUE._col2 (type: int)
                 outputColumnNames: _col0, _col1, _col2
-                File Output Operator
-                  compressed: false
-                  Dp Sort State: PARTITION_SORTED
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: float), _col1 (type: float), _col2 
(type: int)
+                  outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, 
ss_sold_date_sk
                   Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      name: default.ss_part
+                  Group By Operator
+                    aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), 
compute_stats(ss_net_profit, 'hll')
+                    keys: ss_sold_date_sk (type: int)
+                    mode: complete
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col1 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                Select Operator
+                  expressions: _col0 (type: float), _col1 (type: float), _col2 
(type: int)
+                  outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
+                    Dp Sort State: PARTITION_SORTED
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.ss_part
 
   Stage: Stage-2
     Dependency Collection

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out 
b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
new file mode 100644
index 0000000..fbc75ce
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
@@ -0,0 +1,163 @@
+PREHOOK: query: select p_name
+from (select p_name from part distribute by 1 sort by 1) p 
+distribute by 1 sort by 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_name
+from (select p_name from part distribute by 1 sort by 1) p 
+distribute by 1 sort by 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+almond antique burnished rose metallic
+almond antique burnished rose metallic
+almond antique chartreuse lavender yellow
+almond antique salmon chartreuse burlywood
+almond aquamarine burnished black steel
+almond aquamarine pink moccasin thistle
+almond antique violet chocolate turquoise
+almond antique violet turquoise frosted
+almond aquamarine midnight light salmon
+almond aquamarine rose maroon antique
+almond aquamarine sandy cyan gainsboro
+almond antique chartreuse khaki white
+almond antique forest lavender goldenrod
+almond antique metallic orange dim
+almond antique misty red olive
+almond antique olive coral navajo
+almond antique gainsboro frosted violet
+almond antique violet mint lemon
+almond aquamarine floral ivory bisque
+almond aquamarine yellow dodger mint
+almond azure aquamarine papaya violet
+almond antique blue firebrick mint
+almond antique medium spring khaki
+almond antique sky peru orange
+almond aquamarine dodger light gainsboro
+almond azure blanched chiffon midnight
+PREHOOK: query: create temporary table d1 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d1
+POSTHOOK: query: create temporary table d1 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d1
+PREHOOK: query: create temporary table d2 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d2
+POSTHOOK: query: create temporary table d2 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d2
+PREHOOK: query: explain from (select key from src cluster by key) a
+  insert overwrite table d1 select a.key
+  insert overwrite table d2 select a.key cluster by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@d1
+PREHOOK: Output: default@d2
+POSTHOOK: query: explain from (select key from src cluster by key) a
+  insert overwrite table d1 select a.key
+  insert overwrite table d2 select a.key cluster by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@d1
+POSTHOOK: Output: default@d2
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-3
+  Stage-4 depends on stages: Stage-0
+  Stage-1 depends on stages: Stage-3
+  Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.d1
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.d2
+
+  Stage: Stage-3
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.d1
+
+  Stage: Stage-4
+    Stats Work
+      Basic Stats Work:
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.d2
+
+  Stage: Stage-5
+    Stats Work
+      Basic Stats Work:
+

http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/reducesink_dedup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/reducesink_dedup.q.out 
b/ql/src/test/results/clientpositive/reducesink_dedup.q.out
index b89df52..2b068ac 100644
--- a/ql/src/test/results/clientpositive/reducesink_dedup.q.out
+++ b/ql/src/test/results/clientpositive/reducesink_dedup.q.out
@@ -1,10 +1,10 @@
-PREHOOK: query: select p_name 
+PREHOOK: query: select p_name
 from (select p_name from part distribute by 1 sort by 1) p 
 distribute by 1 sort by 1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part
 #### A masked pattern was here ####
-POSTHOOK: query: select p_name 
+POSTHOOK: query: select p_name
 from (select p_name from part distribute by 1 sort by 1) p 
 distribute by 1 sort by 1
 POSTHOOK: type: QUERY
@@ -36,3 +36,201 @@ almond antique medium spring khaki
 almond antique sky peru orange
 almond aquamarine dodger light gainsboro
 almond azure blanched chiffon midnight
+PREHOOK: query: create temporary table d1 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d1
+POSTHOOK: query: create temporary table d1 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d1
+PREHOOK: query: create temporary table d2 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d2
+POSTHOOK: query: create temporary table d2 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d2
+PREHOOK: query: explain from (select key from src cluster by key) a
+  insert overwrite table d1 select a.key
+  insert overwrite table d2 select a.key cluster by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@d1
+PREHOOK: Output: default@d2
+POSTHOOK: query: explain from (select key from src cluster by key) a
+  insert overwrite table d1 select a.key
+  insert overwrite table d2 select a.key cluster by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@d1
+POSTHOOK: Output: default@d2
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+  Stage-4 depends on stages: Stage-2
+  Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
+  Stage-1 depends on stages: Stage-2
+  Stage-6 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: UDFToInteger(_col0) (type: int)
+            outputColumnNames: _col0
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.d1
+            Select Operator
+              expressions: _col0 (type: int)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll')
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Select Operator
+            expressions: UDFToInteger(_col0) (type: int)
+            outputColumnNames: _col0
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.d2
+            Select Operator
+              expressions: _col0 (type: int)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll')
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.d1
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key
+          Column Types: int
+          Table: default.d1
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-5
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key
+          Column Types: int
+          Table: default.d2
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.d2
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+

hive git commit: HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet Garg, reviewed Ashutosh Chauhan)

Reply via email to