[8/8] hive git commit: HIVE-18875 : Enable SMB Join by default in Tez (Deepak Jaiswal, reviewed by Gunther Hagleitner)

djaiswal Sun, 10 Jun 2018 17:56:54 -0700

HIVE-18875 : Enable SMB Join by default in Tez (Deepak Jaiswal, reviewed by 
Gunther Hagleitner)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0a961aa8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0a961aa8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0a961aa8

Branch: refs/heads/master
Commit: 0a961aa8fd2187e901ec071ea9ddc3ab4d4970c5
Parents: 773034f
Author: Deepak Jaiswal <djais...@apache.org>
Authored: Sun Jun 10 00:25:34 2018 -0700
Committer: Deepak Jaiswal <djais...@apache.org>
Committed: Sun Jun 10 17:55:30 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +-
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/ql/exec/CommonMergeJoinOperator.java   |  31 +-
 .../hadoop/hive/ql/exec/GroupByOperator.java    |   2 +-
 .../apache/hadoop/hive/ql/exec/Operator.java    |  12 +
 .../hive/ql/exec/tez/ReduceRecordSource.java    |  10 +
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |  17 +
 .../annotation/OpTraitsRulesProcFactory.java    |  41 +-
 .../clientpositive/auto_sortmerge_join_11.q     |   1 +
 .../clientpositive/auto_sortmerge_join_6.q      |   4 +-
 .../test/queries/clientpositive/skewjoinopt19.q |   1 +
 .../test/queries/clientpositive/skewjoinopt20.q |   1 +
 .../queries/clientpositive/smb_mapjoin_11.q     |   3 +-
 .../queries/clientpositive/smb_mapjoin_12.q     |   2 +-
 .../queries/clientpositive/smb_mapjoin_17.q     |   1 +
 .../queries/clientpositive/subquery_notin.q     |   2 +-
 .../llap/auto_sortmerge_join_6.q.out            | 150 ++--
 .../llap/correlationoptimizer2.q.out            | 450 +++++-------
 .../llap/correlationoptimizer6.q.out            | 170 ++---
 .../clientpositive/llap/explainuser_1.q.out     | 143 ++--
 .../clientpositive/llap/limit_pushdown.q.out    |  83 +--
 .../results/clientpositive/llap/mergejoin.q.out | 715 ++++---------------
 .../test/results/clientpositive/llap/mrr.q.out  | 132 ++--
 .../llap/offset_limit_ppd_optimizer.q.out       |  85 +--
 .../results/clientpositive/llap/smb_cache.q.out |  50 +-
 .../clientpositive/llap/smb_mapjoin_14.q.out    | 644 ++++++-----------
 .../clientpositive/llap/smb_mapjoin_15.q.out    | 101 +--
 .../clientpositive/llap/smb_mapjoin_4.q.out     | 544 ++++----------
 .../clientpositive/llap/smb_mapjoin_5.q.out     | 544 ++++----------
 .../clientpositive/llap/smb_mapjoin_6.q.out     | 415 ++++-------
 .../llap/subquery_in_having.q.out               | 160 ++---
 .../clientpositive/llap/subquery_notin.q.out    | 124 ++--
 .../llap/vectorized_bucketmapjoin1.q.out        | 307 ++------
 .../clientpositive/spark/bucketmapjoin1.q.out   | 210 ++----
 .../clientpositive/spark/smb_mapjoin_14.q.out   | 711 +++++-------------
 .../clientpositive/spark/smb_mapjoin_15.q.out   | 399 ++---------
 .../clientpositive/spark/smb_mapjoin_4.q.out    | 640 ++++-------------
 .../clientpositive/spark/smb_mapjoin_5.q.out    | 640 ++++-------------
 .../clientpositive/spark/smb_mapjoin_6.q.out    | 331 +++------
 .../clientpositive/spark/subquery_notin.q.out   |   8 +-
 40 files changed, 2329 insertions(+), 5558 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 87db42a..1e8a389 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2076,7 +2076,7 @@ public class HiveConf extends Configuration {
     HIVE_ENFORCE_NOT_NULL_CONSTRAINT("hive.constraint.notnull.enforce", true,
         "Should \"IS NOT NULL \" constraint be enforced?"),
 
-    HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false,
+    HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", true,
         "Will the join be automatically converted to a sort-merge join, if the 
joined tables pass the criteria for sort-merge join."),
     
HIVE_AUTO_SORTMERGE_JOIN_REDUCE("hive.auto.convert.sortmerge.join.reduce.side", 
true,
         "Whether hive.auto.convert.sortmerge.join (if enabled) should be 
applied to reduce side."),

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 0f13d62..c71c540 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -701,6 +701,7 @@ minillaplocal.query.files=\
   tez_smb_1.q,\
   tez_smb_empty.q,\
   tez_smb_main.q,\
+  tez_smb_reduce_side.q,\
   tez_union.q,\
   tez_union2.q,\
   tez_union_decimal.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
index aefaa05..8f98ace 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
@@ -25,6 +25,8 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeSet;
+
+import org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -148,7 +150,7 @@ public class CommonMergeJoinOperator extends 
AbstractMapJoinOperator<CommonMerge
 
     for (byte pos = 0; pos < order.length; pos++) {
       if (pos != posBigTable) {
-        if ((parentOperators != null) && (parentOperators.isEmpty() == false)
+        if ((parentOperators != null) && !parentOperators.isEmpty()
             && (parentOperators.get(pos) instanceof TezDummyStoreOperator)) {
           TezDummyStoreOperator dummyStoreOp = (TezDummyStoreOperator) 
parentOperators.get(pos);
           fetchDone[pos] = dummyStoreOp.getFetchDone();
@@ -161,6 +163,15 @@ public class CommonMergeJoinOperator extends 
AbstractMapJoinOperator<CommonMerge
 
     sources = ((TezContext) MapredContext.get()).getRecordSources();
     interruptChecker = new InterruptibleProcessing();
+
+    if (sources[0] instanceof ReduceRecordSource &&
+        parentOperators != null && !parentOperators.isEmpty()) {
+      // Tell ReduceRecordSource to flush last record as this is a reduce
+      // side SMB
+      for (RecordSource source : sources) {
+        ((ReduceRecordSource) source).setFlushLastRecord(true);
+      }
+    }
   }
 
   /*
@@ -230,7 +241,7 @@ public class CommonMergeJoinOperator extends 
AbstractMapJoinOperator<CommonMerge
             continue;
           }
 
-          if (foundNextKeyGroup[i] == false) {
+          if (!foundNextKeyGroup[i]) {
             canEmit = false;
             break;
           }
@@ -258,13 +269,12 @@ public class CommonMergeJoinOperator extends 
AbstractMapJoinOperator<CommonMerge
     // catch up with the big table.
     if (nextKeyGroup) {
       assert tag == posBigTable;
-      List<Byte> smallestPos = null;
+      List<Byte> listOfFetchNeeded = null;
       do {
-        smallestPos = joinOneGroup();
+        listOfFetchNeeded = joinOneGroup();
         //jump out the loop if we need input from the big table
-      } while (smallestPos != null && smallestPos.size() > 0
-          && !smallestPos.contains(this.posBigTable));
-
+      } while (listOfFetchNeeded != null && listOfFetchNeeded.size() > 0
+          && !listOfFetchNeeded.contains(this.posBigTable));
       return;
     }
 
@@ -360,6 +370,9 @@ public class CommonMergeJoinOperator extends 
AbstractMapJoinOperator<CommonMerge
   }
 
   private void fetchNextGroup(Byte t) throws HiveException {
+    if (keyWritables[t] != null) {
+      return; // First process the current key.
+    }
     if (foundNextKeyGroup[t]) {
       // first promote the next group to be the current group if we reached a
       // new group in the previous fetch
@@ -530,6 +543,10 @@ public class CommonMergeJoinOperator extends 
AbstractMapJoinOperator<CommonMerge
     } else {
       int cmp = compareKeys(alias, key, keyWritable);
       if (cmp != 0) {
+        // Cant overwrite existing keys
+        if (nextKeyWritables[alias] != null) {
+          throw new HiveException("Attempting to overwrite nextKeyWritables[" 
+ alias + "]");
+        }
         nextKeyWritables[alias] = key;
         return true;
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
index 4b76638..4882e61 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
@@ -734,7 +734,7 @@ public class GroupByOperator extends Operator<GroupByDesc> 
implements IConfigure
   @Override
   public void process(Object row, int tag) throws HiveException {
     firstRow = false;
-    ObjectInspector rowInspector = inputObjInspectors[tag];
+    ObjectInspector rowInspector = inputObjInspectors[0];
     // Total number of input rows is needed for hash aggregation only
     if (hashAggr) {
       numRowsInput++;

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
index 108bb57..4e9784d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
@@ -663,6 +663,18 @@ public abstract class Operator<T extends OperatorDesc> 
implements Serializable,C
   public void flush() throws HiveException {
   }
 
+  // Recursive flush to flush all the tree operators
+  public void flushRecursive() throws HiveException {
+    flush();
+    if (childOperators == null) {
+      return;
+    }
+
+    for (Operator<?> child : childOperators) {
+      child.flushRecursive();
+    }
+  }
+
   public void processGroup(int tag) throws HiveException {
     if (childOperators == null || childOperators.isEmpty()) {
       return;

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
index fca783c..688fde8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
@@ -123,6 +123,9 @@ public class ReduceRecordSource implements RecordSource {
   private long vectorizedVertexNum;
   private int vectorizedTestingReducerBatchSize;
 
+  // Flush the last record when reader is out of records
+  private boolean flushLastRecord = false;
+
   void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc 
keyTableDesc,
       TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte 
tag,
       VectorizedRowBatchCtx batchContext, long vectorizedVertexNum,
@@ -254,6 +257,9 @@ public class ReduceRecordSource implements RecordSource {
 
     try {
       if (!reader.next()) {
+        if (flushLastRecord) {
+          reducer.flushRecursive();
+        }
         return false;
       }
 
@@ -508,4 +514,8 @@ public class ReduceRecordSource implements RecordSource {
   public ObjectInspector getObjectInspector() {
     return rowObjectInspector;
   }
+
+  public void setFlushLastRecord(boolean flushLastRecord) {
+    this.flushLastRecord = flushLastRecord;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 4019f13..9a7b1ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -608,6 +608,23 @@ public class ConvertJoinMapJoin implements NodeProcessor {
         return false;
       }
       ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp;
+      List<ExprNodeDesc> keyCols = rsOp.getConf().getKeyCols();
+
+      // For SMB, the key column(s) in RS should be same as bucket column(s) 
and sort column(s)`
+      List<String> sortCols = rsOp.getOpTraits().getSortCols().get(0);
+      List<String> bucketCols = rsOp.getOpTraits().getBucketColNames().get(0);
+      if (sortCols.size() != keyCols.size() || bucketCols.size() != 
keyCols.size()) {
+        return false;
+      }
+
+      // Check columns.
+      for (int i = 0; i < sortCols.size(); i++) {
+        ExprNodeDesc sortCol = rsOp.getColumnExprMap().get(sortCols.get(i));
+        ExprNodeDesc bucketCol = 
rsOp.getColumnExprMap().get(bucketCols.get(i));
+        if (!(sortCol.isSame(keyCols.get(i)) && 
bucketCol.isSame(keyCols.get(i)))) {
+          return false;
+        }
+      }
 
       if 
(!checkColEquality(rsOp.getParentOperators().get(0).getOpTraits().getSortCols(),
 rsOp
           .getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) {

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
index 9e54465..dbcbbfd 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
@@ -260,23 +260,43 @@ public class OpTraitsRulesProcFactory {
 
   public static class SelectRule implements NodeProcessor {
 
+    boolean processSortCols = false;
+
+    // For bucket columns
+    // If all the columns match to the parent, put them in the bucket cols
+    // else, add empty list.
+    // For sort columns
+    // Keep the subset of all the columns as long as order is maintained.
     public List<List<String>> getConvertedColNames(
         List<List<String>> parentColNames, SelectOperator selOp) {
-      List<List<String>> listBucketCols = new ArrayList<List<String>>();
+      List<List<String>> listBucketCols = new ArrayList<>();
       if (selOp.getColumnExprMap() != null) {
         if (parentColNames != null) {
           for (List<String> colNames : parentColNames) {
-            List<String> bucketColNames = new ArrayList<String>();
+            List<String> bucketColNames = new ArrayList<>();
+            boolean found = false;
             for (String colName : colNames) {
               for (Entry<String, ExprNodeDesc> entry : 
selOp.getColumnExprMap().entrySet()) {
-                if (entry.getValue() instanceof ExprNodeColumnDesc) {
-                  if (((ExprNodeColumnDesc) 
(entry.getValue())).getColumn().equals(colName)) {
-                    bucketColNames.add(entry.getKey());
-                  }
+                if ((entry.getValue() instanceof ExprNodeColumnDesc) &&
+                    (((ExprNodeColumnDesc) 
(entry.getValue())).getColumn().equals(colName))) {
+                  bucketColNames.add(entry.getKey());
+                  found = true;
+                  break;
                 }
               }
+              if (!found) {
+                // Bail out on first missed column.
+                break;
+              }
+            }
+            if (!processSortCols && !found) {
+              // While processing bucket columns, atleast one bucket column
+              // missed. This results in a different bucketing scheme.
+              // Add empty list
+              listBucketCols.add(new ArrayList<>());
+            } else  {
+              listBucketCols.add(bucketColNames);
             }
-            listBucketCols.add(bucketColNames);
           }
         }
       }
@@ -300,6 +320,7 @@ public class OpTraitsRulesProcFactory {
         List<List<String>> parentSortColNames =
             selOp.getParentOperators().get(0).getOpTraits().getSortCols();
         if (parentSortColNames != null) {
+          processSortCols = true;
           listSortCols = getConvertedColNames(parentSortColNames, selOp);
         }
       }
@@ -309,7 +330,11 @@ public class OpTraitsRulesProcFactory {
       int bucketingVersion = -1;
       OpTraits parentOpTraits = 
selOp.getParentOperators().get(0).getOpTraits();
       if (parentOpTraits != null) {
-        numBuckets = parentOpTraits.getNumBuckets();
+        // if bucket columns are empty, then numbuckets must be set to -1.
+        if (listBucketCols != null &&
+            !(listBucketCols.isEmpty() || listBucketCols.get(0).isEmpty())) {
+          numBuckets = parentOpTraits.getNumBuckets();
+        }
         numReduceSinks = parentOpTraits.getNumReduceSinks();
         bucketingVersion = parentOpTraits.getBucketingVersion();
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q 
b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
index 7416eb0..76e615f 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
@@ -22,6 +22,7 @@ load data local inpath 
'../../data/files/auto_sortmerge_join/big/000003_0' INTO
 set hive.auto.convert.join=true;
 -- disable hash joins
 set hive.auto.convert.join.noconditionaltask.size=10;
+set hive.auto.convert.sortmerge.join=false;
 explain extended select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b 
ON a.key = b.key;
 select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b ON a.key = b.key;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q 
b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
index 551e5f7..0aeec0e 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
@@ -53,7 +53,7 @@ select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = 
b.key join src c on c.k
 explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join 
tbl3_n0 c on c.key = a.key;
 select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c 
on c.key = a.key;
 
--- A SMB join is being followed by a regular join on a bucketed table on a 
different key
+-- The join order ensures there is no SMB
 explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join 
tbl4 c on c.value = a.value;
 select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on 
c.value = a.value;
 
@@ -71,6 +71,6 @@ select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = 
b.key join src c on c.k
 explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join 
tbl3_n0 c on c.key = a.key;
 select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c 
on c.key = a.key;
 
--- A SMB join is being followed by a regular join on a bucketed table on a 
different key
+-- The join order ensures there is no SMB
 explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join 
tbl4 c on c.value = a.value;
 select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on 
c.value = a.value;

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/skewjoinopt19.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skewjoinopt19.q 
b/ql/src/test/queries/clientpositive/skewjoinopt19.q
index 02cadda..df8ab71 100644
--- a/ql/src/test/queries/clientpositive/skewjoinopt19.q
+++ b/ql/src/test/queries/clientpositive/skewjoinopt19.q
@@ -1,5 +1,6 @@
 set hive.mapred.mode=nonstrict;
 set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.sortmerge.join=false;
 
 CREATE TABLE T1_n34(key STRING, val STRING)
 CLUSTERED BY (key) INTO 4 BUCKETS

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/skewjoinopt20.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skewjoinopt20.q 
b/ql/src/test/queries/clientpositive/skewjoinopt20.q
index 160e5b8..bebe007 100644
--- a/ql/src/test/queries/clientpositive/skewjoinopt20.q
+++ b/ql/src/test/queries/clientpositive/skewjoinopt20.q
@@ -1,5 +1,6 @@
 set hive.mapred.mode=nonstrict;
 set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.sortmerge.join=false;
 
 CREATE TABLE T1_n103(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q 
b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
index 6ce49b8..d0cea5b 100644
--- a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
+++ b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
@@ -8,7 +8,8 @@ set hive.cbo.enable=false;
 
 set hive.exec.reducers.max = 1;
 set hive.merge.mapfiles=false;
-set hive.merge.mapredfiles=false; 
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join=false;
 
 -- This test verifies that the output of a sort merge join on 2 partitions 
(one on each side of the join) is bucketed
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q 
b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
index 753e4d3..6f9ecab 100644
--- a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
+++ b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
@@ -21,7 +21,7 @@ INSERT OVERWRITE TABLE test_table2_n6 PARTITION (ds = '2') 
SELECT *
 INSERT OVERWRITE TABLE test_table2_n6 PARTITION (ds = '3') SELECT *;
 
 
-
+set hive.auto.convert.sortmerge.join=false;
 
 -- Create a bucketed table
 CREATE TABLE test_table3_n4 (key INT, value STRING) PARTITIONED BY (ds STRING) 
CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS;

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_17.q 
b/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
index d68f5f3..7454445 100644
--- a/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
+++ b/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
@@ -43,6 +43,7 @@ SELECT * FROM src WHERE key < 10;
 INSERT OVERWRITE TABLE test_table8
 SELECT * FROM src  WHERE key < 10;
 
+set hive.auto.convert.sortmerge.join=false;
 -- Mapjoin followed by a aggregation should be performed in a single MR job 
upto 7 tables
 EXPLAIN
 SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*)

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/subquery_notin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q 
b/ql/src/test/queries/clientpositive/subquery_notin.q
index 6494027..a2d93df 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -177,7 +177,7 @@ SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0);
 
 -- corr
 explain SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where 
t1_n0.c2=t2_n0.c1);
-SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where 
t1_n0.c1=t2_n0.c1);
+SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where 
t1_n0.c2=t2_n0.c1);
 
 DROP TABLE t1_n0;
 DROP TABLE t2_n0;

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out 
b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
index b13beab..828c6e1 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
@@ -772,23 +772,13 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: c
-                  Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: value is not null (type: boolean)
-                    Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: value (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
@@ -798,23 +788,36 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col1 (type: string)
-                          1 _col0 (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 814 Data size: 3256 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 814 Data size: 3256 Basic 
stats: COMPLETE Column stats: COMPLETE
-            Execution mode: llap
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 47500 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Map 5 
             Map Operator Tree:
                 TableScan
+                  alias: c
+                  Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: value (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 45500 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
@@ -838,6 +841,22 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 814 Data size: 3256 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 814 Data size: 3256 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                 Statistics: Num rows: 1343 Data size: 10744 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -850,7 +869,7 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
                     value expressions: _col0 (type: bigint)
-        Reducer 3 
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -884,7 +903,7 @@ POSTHOOK: Input: default@tbl1_n4
 POSTHOOK: Input: default@tbl2_n3
 POSTHOOK: Input: default@tbl4
 #### A masked pattern was here ####
-90
+2654
 PREHOOK: query: explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key 
= b.key join src c on c.value = a.value
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON 
a.key = b.key join src c on c.value = a.value
@@ -1295,23 +1314,13 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: c
-                  Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: value is not null (type: boolean)
-                    Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: value (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
@@ -1321,23 +1330,36 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col1 (type: string)
-                          1 _col0 (type: string)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 814 Data size: 3256 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 814 Data size: 3256 Basic 
stats: COMPLETE Column stats: COMPLETE
-            Execution mode: llap
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 47500 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Map 5 
             Map Operator Tree:
                 TableScan
+                  alias: c
+                  Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: value (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 45500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 45500 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
                   alias: b
                   Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
@@ -1361,6 +1383,22 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 814 Data size: 3256 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 814 Data size: 3256 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                 Statistics: Num rows: 1343 Data size: 10744 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -1373,7 +1411,7 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
                     value expressions: _col0 (type: bigint)
-        Reducer 3 
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -1407,4 +1445,4 @@ POSTHOOK: Input: default@tbl1_n4
 POSTHOOK: Input: default@tbl2_n3
 POSTHOOK: Input: default@tbl4
 #### A masked pattern was here ####
-90
+2654

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out 
b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
index 8e17d95..879c999 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
@@ -21,10 +21,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -49,7 +47,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -72,45 +70,43 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 9 Data size: 1701 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
-                    mode: hash
+                  Select Operator
+                    expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
+                    Statistics: Num rows: 9 Data size: 1701 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
-        Reducer 4 
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -125,21 +121,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -189,10 +170,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -217,7 +196,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -240,45 +219,43 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 9 Data size: 1701 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
-                    mode: hash
+                  Select Operator
+                    expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
+                    Statistics: Num rows: 9 Data size: 1701 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
-        Reducer 4 
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -293,21 +270,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -357,10 +319,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -386,7 +346,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -406,45 +366,43 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Left Outer Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 250 Data size: 24690 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Left Outer Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 250 Data size: 24690 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
-                    mode: hash
+                  Select Operator
+                    expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
+                    Statistics: Num rows: 250 Data size: 24690 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
-        Reducer 4 
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -459,21 +417,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -523,10 +466,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -552,7 +493,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -572,45 +513,43 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Left Outer Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 250 Data size: 24690 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Left Outer Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 250 Data size: 24690 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
-                    mode: hash
+                  Select Operator
+                    expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
+                    Statistics: Num rows: 250 Data size: 24690 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
-        Reducer 4 
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -625,21 +564,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -689,10 +613,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -714,7 +636,7 @@ STAGE PLANS:
                       value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -737,8 +659,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: vectorized, llap
+        Reducer 4 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -746,37 +667,36 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Right Outer Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Right Outer Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 12 Data size: 2078 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
-                    mode: hash
+                  Select Operator
+                    expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
+                    Statistics: Num rows: 12 Data size: 2078 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
-        Reducer 4 
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 5 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -791,21 +711,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -855,10 +760,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -880,7 +783,7 @@ STAGE PLANS:
                       value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -903,8 +806,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: vectorized, llap
+        Reducer 4 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -912,37 +814,36 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Right Outer Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Right Outer Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 12 Data size: 2078 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
-                    mode: hash
+                  Select Operator
+                    expressions: hash(_col0) (type: int), hash(_col1) (type: 
int), hash(_col2) (type: int), hash(_col3) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
+                    Statistics: Num rows: 12 Data size: 2078 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col0), sum(_col1), sum(_col2), 
sum(_col3)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
-        Reducer 4 
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
+        Reducer 5 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -957,21 +858,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out 
b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
index 9e424c2..1fcd6ed 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
@@ -2659,11 +2659,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-        Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
-        Reducer 7 <- Map 6 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2688,7 +2686,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 6 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -2710,7 +2708,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: z
@@ -2731,44 +2729,42 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Group By Operator
-                  aggregations: sum(_col1), sum(_col3)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col3
+                  Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: sum(_col1), sum(_col3)
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 103 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
-        Reducer 4 
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 103 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -2783,7 +2779,7 @@ STAGE PLANS:
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col1 (type: bigint), _col2 (type: bigint)
-        Reducer 5 
+        Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
@@ -2805,21 +2801,6 @@ STAGE PLANS:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 7 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -2913,11 +2894,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-        Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
-        Reducer 7 <- Map 6 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2942,7 +2921,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 6 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -2964,7 +2943,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: z
@@ -2985,44 +2964,42 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
-        Reducer 3 
+                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Group By Operator
-                  aggregations: sum(_col1), sum(_col3)
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col3
+                  Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: sum(_col1), sum(_col3)
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 103 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
-        Reducer 4 
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 1 Data size: 103 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -3037,7 +3014,7 @@ STAGE PLANS:
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col1 (type: bigint), _col2 (type: bigint)
-        Reducer 5 
+        Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
@@ -3059,21 +3036,6 @@ STAGE PLANS:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 7 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator

[8/8] hive git commit: HIVE-18875 : Enable SMB Join by default in Tez (Deepak Jaiswal, reviewed by Gunther Hagleitner)

Reply via email to