HIVE-21007: Semi join + Union can lead to wrong plans (Vineet Garg, reviewed by 
Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9493dcfd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9493dcfd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9493dcfd

Branch: refs/heads/master
Commit: 9493dcfd4737c0fc57f5786646642d2ec2af9518
Parents: b42fdc2
Author: Vineet Garg <vg...@apache.org>
Authored: Mon Dec 10 09:23:22 2018 -0800
Committer: Vineet Garg <vg...@apache.org>
Committed: Mon Dec 10 09:23:22 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/TezCompiler.java       |  56 ++++
 .../llap/dynamic_semijoin_reduction_3.q.out     |  73 +----
 .../clientpositive/perf/tez/cbo_query54.q.out   |   2 +-
 .../perf/tez/constraints/query54.q.out          | 251 ++++++++---------
 .../perf/tez/constraints/query8.q.out           | 138 +++++-----
 .../clientpositive/perf/tez/query54.q.out       | 271 +++++++++----------
 .../clientpositive/perf/tez/query8.q.out        | 138 +++++-----
 7 files changed, 458 insertions(+), 471 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 02cebdc..4b10e89 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -481,6 +481,12 @@ public class TezCompiler extends TaskCompiler {
       markSemiJoinForDPP(procCtx);
       perfLogger.PerfLogEnd(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based ");
 
+      // Remove any semi join edges from Union Op
+      perfLogger.PerfLogBegin(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER);
+      removeSemiJoinEdgesForUnion(procCtx);
+      perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER,
+                            "Remove any semi join edge between Union and RS");
+
       // Remove any parallel edge between semijoin and mapjoin.
       perfLogger.PerfLogBegin(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER);
       removeSemijoinsParallelToMapJoin(procCtx);
@@ -1314,6 +1320,56 @@ public class TezCompiler extends TaskCompiler {
   }
 
   /*
+   * Given an operator this method removes all semi join edges downstream 
(children) until it hits RS
+   */
+  private void removeSemiJoinEdges(Operator<?> op, OptimizeTezProcContext 
procCtx,
+                                   Map<ReduceSinkOperator, TableScanOperator> 
sjToRemove) throws SemanticException {
+    if(op instanceof ReduceSinkOperator && op.getNumChild() == 0) {
+      Map<ReduceSinkOperator, SemiJoinBranchInfo> sjMap = 
procCtx.parseContext.getRsToSemiJoinBranchInfo();
+      if(sjMap.get(op) != null) {
+        sjToRemove.put((ReduceSinkOperator)op, sjMap.get(op).getTsOp());
+      }
+    }
+
+    for(Operator<?> child:op.getChildOperators()) {
+      removeSemiJoinEdges(child, procCtx, sjToRemove);
+    }
+  }
+
+  private void removeSemiJoinEdgesForUnion(OptimizeTezProcContext procCtx) 
throws SemanticException{
+    // Get all the TS ops.
+    List<Operator<?>> topOps = new ArrayList<>();
+    topOps.addAll(procCtx.parseContext.getTopOps().values());
+    Set<Operator<?>> unionOps = new HashSet<>();
+
+    Map<ReduceSinkOperator, TableScanOperator> sjToRemove = new HashMap<>();
+    for (Operator<?> parent : topOps) {
+      Deque<Operator<?>> deque = new LinkedList<>();
+      deque.add(parent);
+      while (!deque.isEmpty()) {
+        Operator<?> op = deque.pollLast();
+        if (op instanceof UnionOperator && !unionOps.contains(op)) {
+          unionOps.add(op);
+          removeSemiJoinEdges(op, procCtx, sjToRemove);
+        }
+        deque.addAll(op.getChildOperators());
+      }
+    }
+    // remove sj
+    if (sjToRemove.size() > 0) {
+      for (Map.Entry<ReduceSinkOperator, TableScanOperator> entry : 
sjToRemove.entrySet()) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Semijoin optimization with Union operator. Removing 
semijoin "
+                        + OperatorUtils.getOpNamePretty(entry.getKey()) + " - "
+                        + 
OperatorUtils.getOpNamePretty(sjToRemove.get(entry.getKey())));
+        }
+        GenTezUtils.removeBranch(entry.getKey());
+        GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, 
entry.getKey(), entry.getValue());
+      }
+    }
+  }
+
+  /*
    *  The algorithm looks at all the mapjoins in the operator pipeline until
    *  it hits RS Op and for each mapjoin examines if it has paralllel semijoin
    *  edge or dynamic partition pruning.

http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out 
b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
index cb3740d..ecf6323 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
@@ -596,15 +596,13 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Map 1 <- Union 2 (CONTAINS)
-        Map 10 <- Union 2 (CONTAINS)
-        Map 11 <- Reducer 9 (BROADCAST_EDGE)
-        Reducer 3 <- Map 11 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE)
+        Map 9 <- Union 2 (CONTAINS)
+        Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
         Reducer 5 <- Reducer 3 (SIMPLE_EDGE)
         Reducer 6 <- Reducer 3 (SIMPLE_EDGE)
         Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
         Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
-        Reducer 9 <- Union 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -626,24 +624,24 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
-                      Select Operator
-                        expressions: _col0 (type: int)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=2)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: int), _col1 (type: 
int), _col2 (type: binary)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Map 10 
             Map Operator Tree:
                 TableScan
+                  alias: t
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: a (type: int)
+                    sort order: +
+                    Map-reduce partition columns: a (type: int)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Map 9 
+            Map Operator Tree:
+                TableScan
                   alias: nonacidorctbl
                   filterExpr: (b > 0) (type: boolean)
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
@@ -660,37 +658,8 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
-                      Select Operator
-                        expressions: _col0 (type: int)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=2)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: int), _col1 (type: 
int), _col2 (type: binary)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
-        Map 11 
-            Map Operator Tree:
-                TableScan
-                  alias: t
-                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (a BETWEEN 
DynamicValue(RS_10_nonacidorctbl__col0_min) AND 
DynamicValue(RS_10_nonacidorctbl__col0_max) and in_bloom_filter(a, 
DynamicValue(RS_10_nonacidorctbl__col0_bloom_filter))) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: a (type: int)
-                      sort order: +
-                      Map-reduce partition columns: a (type: int)
-                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
-            Execution mode: vectorized, llap
-            LLAP IO: may be used (ACID table)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -878,18 +847,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 9 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=2)
-                mode: final
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: binary)
         Union 2 
             Vertex: Union 2
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out
index fd5f6d7..50fa078 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out
@@ -1,6 +1,6 @@
 Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3]] in Stage 'Reducer 4' is a cross product
 Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product
-Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 31' is a cross product
+Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 30' is a cross product
 Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 6' is a cross product
 PREHOOK: query: explain cbo
 with my_customers as (

http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out
index 7a0750e..8d10899 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out
@@ -133,9 +133,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 1 <- Reducer 24 (BROADCAST_EDGE)
-Map 18 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS)
-Map 25 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS)
+Map 18 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS)
+Map 24 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS)
 Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
 Reducer 12 <- Map 11 (SIMPLE_EDGE)
 Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE)
@@ -144,16 +143,15 @@ Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 17 
(CUSTOM_SIMPLE_EDGE)
 Reducer 16 <- Map 11 (SIMPLE_EDGE)
 Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE)
 Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 13 (CUSTOM_SIMPLE_EDGE)
-Reducer 20 <- Map 26 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE)
-Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
-Reducer 22 <- Map 29 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
+Reducer 20 <- Map 25 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE)
+Reducer 21 <- Map 27 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 22 <- Map 28 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
 Reducer 23 <- Reducer 22 (SIMPLE_EDGE)
-Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE)
-Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE)
+Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE)
-Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE)
-Reducer 4 <- Map 30 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 32 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE)
+Reducer 4 <- Map 29 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 31 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE)
 Reducer 7 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
 Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
@@ -164,25 +162,25 @@ Stage-0
     limit:100
     Stage-1
       Reducer 10 vectorized
-      File Output Operator [FS_358]
-        Limit [LIM_357] (rows=1 width=16)
+      File Output Operator [FS_353]
+        Limit [LIM_352] (rows=1 width=16)
           Number of rows:100
-          Select Operator [SEL_356] (rows=1 width=16)
+          Select Operator [SEL_351] (rows=1 width=16)
             Output:["_col0","_col1","_col2"]
           <-Reducer 9 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_355]
-              Select Operator [SEL_354] (rows=1 width=16)
+            SHUFFLE [RS_350]
+              Select Operator [SEL_349] (rows=1 width=16)
                 Output:["_col0","_col1","_col2"]
-                Group By Operator [GBY_353] (rows=1 width=12)
+                Group By Operator [GBY_348] (rows=1 width=12)
                   
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                 <-Reducer 8 [SIMPLE_EDGE] vectorized
-                  SHUFFLE [RS_352]
+                  SHUFFLE [RS_347]
                     PartitionCols:_col0
-                    Group By Operator [GBY_351] (rows=1 width=12)
+                    Group By Operator [GBY_346] (rows=1 width=12)
                       
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                      Select Operator [SEL_350] (rows=1 width=116)
+                      Select Operator [SEL_345] (rows=1 width=116)
                         Output:["_col0"]
-                        Group By Operator [GBY_349] (rows=1 width=116)
+                        Group By Operator [GBY_344] (rows=1 width=116)
                           
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
                         <-Reducer 7 [SIMPLE_EDGE]
                           SHUFFLE [RS_118]
@@ -202,42 +200,42 @@ Stage-0
                                         Merge Join Operator [MERGEJOIN_277] 
(rows=25 width=4)
                                           Conds:(Right Outer),Output:["_col0"]
                                         <-Reducer 14 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                          PARTITION_ONLY_SHUFFLE [RS_340]
-                                            Group By Operator [GBY_339] 
(rows=25 width=4)
+                                          PARTITION_ONLY_SHUFFLE [RS_335]
+                                            Group By Operator [GBY_334] 
(rows=25 width=4)
                                               Output:["_col0"],keys:KEY._col0
                                             <-Map 11 [SIMPLE_EDGE] vectorized
-                                              SHUFFLE [RS_320]
+                                              SHUFFLE [RS_301]
                                                 PartitionCols:_col0
-                                                Group By Operator [GBY_317] 
(rows=25 width=4)
+                                                Group By Operator [GBY_298] 
(rows=25 width=4)
                                                   Output:["_col0"],keys:_col0
-                                                  Select Operator [SEL_314] 
(rows=50 width=12)
+                                                  Select Operator [SEL_295] 
(rows=50 width=12)
                                                     Output:["_col0"]
-                                                    Filter Operator [FIL_312] 
(rows=50 width=12)
+                                                    Filter Operator [FIL_293] 
(rows=50 width=12)
                                                       predicate:((d_moy = 3) 
and (d_year = 1999))
                                                       TableScan [TS_26] 
(rows=73049 width=12)
                                                         
default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"]
                                         <-Reducer 17 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                          PARTITION_ONLY_SHUFFLE [RS_348]
-                                            Select Operator [SEL_347] (rows=1 
width=8)
-                                              Filter Operator [FIL_346] 
(rows=1 width=8)
+                                          PARTITION_ONLY_SHUFFLE [RS_343]
+                                            Select Operator [SEL_342] (rows=1 
width=8)
+                                              Filter Operator [FIL_341] 
(rows=1 width=8)
                                                 
predicate:(sq_count_check(_col0) <= 1)
-                                                Group By Operator [GBY_345] 
(rows=1 width=8)
+                                                Group By Operator [GBY_340] 
(rows=1 width=8)
                                                   
Output:["_col0"],aggregations:["count(VALUE._col0)"]
                                                 <-Reducer 16 
[CUSTOM_SIMPLE_EDGE] vectorized
-                                                  PARTITION_ONLY_SHUFFLE 
[RS_344]
-                                                    Group By Operator 
[GBY_343] (rows=1 width=8)
+                                                  PARTITION_ONLY_SHUFFLE 
[RS_339]
+                                                    Group By Operator 
[GBY_338] (rows=1 width=8)
                                                       
Output:["_col0"],aggregations:["count()"]
-                                                      Select Operator 
[SEL_342] (rows=25 width=4)
-                                                        Group By Operator 
[GBY_341] (rows=25 width=4)
+                                                      Select Operator 
[SEL_337] (rows=25 width=4)
+                                                        Group By Operator 
[GBY_336] (rows=25 width=4)
                                                           
Output:["_col0"],keys:KEY._col0
                                                         <-Map 11 [SIMPLE_EDGE] 
vectorized
-                                                          SHUFFLE [RS_321]
+                                                          SHUFFLE [RS_302]
                                                             PartitionCols:_col0
-                                                            Group By Operator 
[GBY_318] (rows=25 width=4)
+                                                            Group By Operator 
[GBY_299] (rows=25 width=4)
                                                               
Output:["_col0"],keys:_col0
-                                                              Select Operator 
[SEL_315] (rows=50 width=12)
+                                                              Select Operator 
[SEL_296] (rows=50 width=12)
                                                                 
Output:["_col0"]
-                                                                 Please refer 
to the previous Filter Operator [FIL_312]
+                                                                 Please refer 
to the previous Filter Operator [FIL_293]
                                     <-Reducer 6 [CUSTOM_SIMPLE_EDGE]
                                       PARTITION_ONLY_SHUFFLE [RS_112]
                                         Select Operator [SEL_107] 
(rows=224732600 width=119)
@@ -245,41 +243,41 @@ Stage-0
                                           Merge Join Operator [MERGEJOIN_278] 
(rows=224732600 width=119)
                                             Conds:(Left 
Outer),Output:["_col2","_col4","_col7","_col13"]
                                           <-Reducer 12 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                            PARTITION_ONLY_SHUFFLE [RS_324]
-                                              Group By Operator [GBY_322] 
(rows=25 width=4)
+                                            PARTITION_ONLY_SHUFFLE [RS_305]
+                                              Group By Operator [GBY_303] 
(rows=25 width=4)
                                                 Output:["_col0"],keys:KEY._col0
                                               <-Map 11 [SIMPLE_EDGE] vectorized
-                                                SHUFFLE [RS_319]
+                                                SHUFFLE [RS_300]
                                                   PartitionCols:_col0
-                                                  Group By Operator [GBY_316] 
(rows=25 width=4)
+                                                  Group By Operator [GBY_297] 
(rows=25 width=4)
                                                     Output:["_col0"],keys:_col0
-                                                    Select Operator [SEL_313] 
(rows=50 width=12)
+                                                    Select Operator [SEL_294] 
(rows=50 width=12)
                                                       Output:["_col0"]
-                                                       Please refer to the 
previous Filter Operator [FIL_312]
+                                                       Please refer to the 
previous Filter Operator [FIL_293]
                                           <-Reducer 5 [CUSTOM_SIMPLE_EDGE]
                                             PARTITION_ONLY_SHUFFLE [RS_104]
                                               Merge Join Operator 
[MERGEJOIN_276] (rows=8989304 width=8)
                                                 
Conds:RS_101._col5=RS_102._col0(Inner),Output:["_col2","_col4","_col7"]
-                                              <-Reducer 32 [SIMPLE_EDGE]
+                                              <-Reducer 31 [SIMPLE_EDGE]
                                                 SHUFFLE [RS_102]
                                                   PartitionCols:_col0
                                                   Merge Join Operator 
[MERGEJOIN_275] (rows=39720279 width=4)
-                                                    Conds:RS_335._col1, 
_col2=RS_338._col0, _col1(Inner),Output:["_col0"]
-                                                  <-Map 31 [SIMPLE_EDGE] 
vectorized
-                                                    SHUFFLE [RS_335]
+                                                    Conds:RS_330._col1, 
_col2=RS_333._col0, _col1(Inner),Output:["_col0"]
+                                                  <-Map 30 [SIMPLE_EDGE] 
vectorized
+                                                    SHUFFLE [RS_330]
                                                       PartitionCols:_col1, 
_col2
-                                                      Select Operator 
[SEL_334] (rows=40000000 width=188)
+                                                      Select Operator 
[SEL_329] (rows=40000000 width=188)
                                                         
Output:["_col0","_col1","_col2"]
-                                                        Filter Operator 
[FIL_333] (rows=40000000 width=188)
+                                                        Filter Operator 
[FIL_328] (rows=40000000 width=188)
                                                           predicate:(ca_county 
is not null and ca_state is not null)
                                                           TableScan [TS_74] 
(rows=40000000 width=188)
                                                             
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"]
-                                                  <-Map 33 [SIMPLE_EDGE] 
vectorized
-                                                    SHUFFLE [RS_338]
+                                                  <-Map 32 [SIMPLE_EDGE] 
vectorized
+                                                    SHUFFLE [RS_333]
                                                       PartitionCols:_col0, 
_col1
-                                                      Select Operator 
[SEL_337] (rows=1704 width=184)
+                                                      Select Operator 
[SEL_332] (rows=1704 width=184)
                                                         
Output:["_col0","_col1"]
-                                                        Filter Operator 
[FIL_336] (rows=1704 width=184)
+                                                        Filter Operator 
[FIL_331] (rows=1704 width=184)
                                                           predicate:(s_county 
is not null and s_state is not null)
                                                           TableScan [TS_77] 
(rows=1704 width=184)
                                                             
default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"]
@@ -287,11 +285,11 @@ Stage-0
                                                 SHUFFLE [RS_101]
                                                   PartitionCols:_col5
                                                   Merge Join Operator 
[MERGEJOIN_274] (rows=8989304 width=12)
-                                                    
Conds:RS_98._col0=RS_332._col0(Inner),Output:["_col2","_col4","_col5","_col7"]
-                                                  <-Map 30 [SIMPLE_EDGE] 
vectorized
-                                                    SHUFFLE [RS_332]
+                                                    
Conds:RS_98._col0=RS_327._col0(Inner),Output:["_col2","_col4","_col5","_col7"]
+                                                  <-Map 29 [SIMPLE_EDGE] 
vectorized
+                                                    SHUFFLE [RS_327]
                                                       PartitionCols:_col0
-                                                      Select Operator 
[SEL_331] (rows=73049 width=8)
+                                                      Select Operator 
[SEL_326] (rows=73049 width=8)
                                                         
Output:["_col0","_col1"]
                                                         TableScan [TS_72] 
(rows=73049 width=8)
                                                           
default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"]
@@ -299,11 +297,37 @@ Stage-0
                                                     SHUFFLE [RS_98]
                                                       PartitionCols:_col0
                                                       Merge Join Operator 
[MERGEJOIN_273] (rows=8989304 width=8)
-                                                        
Conds:RS_95._col1=RS_303._col0(Inner),Output:["_col0","_col2","_col4","_col5"]
+                                                        
Conds:RS_95._col1=RS_325._col0(Inner),Output:["_col0","_col2","_col4","_col5"]
+                                                      <-Reducer 2 [SIMPLE_EDGE]
+                                                        SHUFFLE [RS_95]
+                                                          PartitionCols:_col1
+                                                          Merge Join Operator 
[MERGEJOIN_269] (rows=525327388 width=114)
+                                                            
Conds:(Inner),Output:["_col0","_col1","_col2"]
+                                                          <-Map 1 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                            
PARTITION_ONLY_SHUFFLE [RS_292]
+                                                              Select Operator 
[SEL_291] (rows=525327388 width=114)
+                                                                
Output:["_col0","_col1","_col2"]
+                                                                Filter 
Operator [FIL_290] (rows=525327388 width=114)
+                                                                  
predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+                                                                  TableScan 
[TS_23] (rows=575995635 width=114)
+                                                                    
default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"]
+                                                          <-Reducer 13 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                            
PARTITION_ONLY_SHUFFLE [RS_311]
+                                                              Select Operator 
[SEL_310] (rows=1 width=8)
+                                                                Filter 
Operator [FIL_309] (rows=1 width=8)
+                                                                  
predicate:(sq_count_check(_col0) <= 1)
+                                                                  Group By 
Operator [GBY_308] (rows=1 width=8)
+                                                                    
Output:["_col0"],aggregations:["count(VALUE._col0)"]
+                                                                  <-Reducer 12 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                                    
PARTITION_ONLY_SHUFFLE [RS_307]
+                                                                      Group By 
Operator [GBY_306] (rows=1 width=8)
+                                                                        
Output:["_col0"],aggregations:["count()"]
+                                                                        Select 
Operator [SEL_304] (rows=25 width=4)
+                                                                           
Please refer to the previous Group By Operator [GBY_303]
                                                       <-Reducer 23 
[SIMPLE_EDGE] vectorized
-                                                        SHUFFLE [RS_303]
+                                                        SHUFFLE [RS_325]
                                                           PartitionCols:_col0
-                                                          Group By Operator 
[GBY_302] (rows=55046 width=8)
+                                                          Group By Operator 
[GBY_324] (rows=55046 width=8)
                                                             
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
                                                           <-Reducer 22 
[SIMPLE_EDGE]
                                                             SHUFFLE [RS_69]
@@ -311,13 +335,13 @@ Stage-0
                                                               Group By 
Operator [GBY_68] (rows=55046 width=8)
                                                                 
Output:["_col0","_col1"],keys:_col5, _col6
                                                                 Merge Join 
Operator [MERGEJOIN_272] (rows=110092 width=8)
-                                                                  
Conds:RS_64._col1=RS_301._col0(Inner),Output:["_col5","_col6"]
-                                                                <-Map 29 
[SIMPLE_EDGE] vectorized
-                                                                  SHUFFLE 
[RS_301]
+                                                                  
Conds:RS_64._col1=RS_323._col0(Inner),Output:["_col5","_col6"]
+                                                                <-Map 28 
[SIMPLE_EDGE] vectorized
+                                                                  SHUFFLE 
[RS_323]
                                                                     
PartitionCols:_col0
-                                                                    Select 
Operator [SEL_300] (rows=80000000 width=8)
+                                                                    Select 
Operator [SEL_322] (rows=80000000 width=8)
                                                                       
Output:["_col0","_col1"]
-                                                                      Filter 
Operator [FIL_299] (rows=80000000 width=8)
+                                                                      Filter 
Operator [FIL_321] (rows=80000000 width=8)
                                                                         
predicate:c_current_addr_sk is not null
                                                                         
TableScan [TS_55] (rows=80000000 width=8)
                                                                           
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"]
@@ -325,13 +349,13 @@ Stage-0
                                                                   SHUFFLE 
[RS_64]
                                                                     
PartitionCols:_col1
                                                                     Merge Join 
Operator [MERGEJOIN_271] (rows=110092 width=0)
-                                                                      
Conds:RS_61._col2=RS_298._col0(Inner),Output:["_col1"]
-                                                                    <-Map 28 
[SIMPLE_EDGE] vectorized
-                                                                      SHUFFLE 
[RS_298]
+                                                                      
Conds:RS_61._col2=RS_320._col0(Inner),Output:["_col1"]
+                                                                    <-Map 27 
[SIMPLE_EDGE] vectorized
+                                                                      SHUFFLE 
[RS_320]
                                                                         
PartitionCols:_col0
-                                                                        Select 
Operator [SEL_297] (rows=453 width=4)
+                                                                        Select 
Operator [SEL_319] (rows=453 width=4)
                                                                           
Output:["_col0"]
-                                                                          
Filter Operator [FIL_296] (rows=453 width=186)
+                                                                          
Filter Operator [FIL_318] (rows=453 width=186)
                                                                             
predicate:((i_category = 'Jewelry') and (i_class = 'consignment'))
                                                                             
TableScan [TS_52] (rows=462000 width=186)
                                                                               
default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"]
@@ -339,84 +363,47 @@ Stage-0
                                                                       SHUFFLE 
[RS_61]
                                                                         
PartitionCols:_col2
                                                                         Merge 
Join Operator [MERGEJOIN_270] (rows=11665117 width=7)
-                                                                          
Conds:Union 19._col0=RS_292._col0(Inner),Output:["_col1","_col2"]
-                                                                        <-Map 
26 [SIMPLE_EDGE] vectorized
-                                                                          
PARTITION_ONLY_SHUFFLE [RS_292]
+                                                                          
Conds:Union 19._col0=RS_314._col0(Inner),Output:["_col1","_col2"]
+                                                                        <-Map 
25 [SIMPLE_EDGE] vectorized
+                                                                          
PARTITION_ONLY_SHUFFLE [RS_314]
                                                                             
PartitionCols:_col0
-                                                                            
Select Operator [SEL_291] (rows=50 width=4)
+                                                                            
Select Operator [SEL_313] (rows=50 width=4)
                                                                               
Output:["_col0"]
-                                                                              
Filter Operator [FIL_290] (rows=50 width=12)
+                                                                              
Filter Operator [FIL_312] (rows=50 width=12)
                                                                                
 predicate:((d_moy = 3) and (d_year = 1999))
                                                                                
 TableScan [TS_49] (rows=73049 width=12)
                                                                                
   
default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"]
                                                                         
<-Union 19 [SIMPLE_EDGE]
                                                                           
<-Map 18 [CONTAINS] vectorized
-                                                                            
Reduce Output Operator [RS_364]
+                                                                            
Reduce Output Operator [RS_359]
                                                                               
PartitionCols:_col0
-                                                                              
Select Operator [SEL_363] (rows=285117831 width=11)
+                                                                              
Select Operator [SEL_358] (rows=285117831 width=11)
                                                                                
 Output:["_col0","_col1","_col2"]
-                                                                               
 Filter Operator [FIL_362] (rows=285117831 width=11)
+                                                                               
 Filter Operator [FIL_357] (rows=285117831 width=11)
                                                                                
   predicate:((cs_sold_date_sk BETWEEN 
DynamicValue(RS_59_date_dim_d_date_sk_min) AND 
DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, 
DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk 
is not null and cs_sold_date_sk is not null)
                                                                                
   TableScan [TS_280] (rows=287989836 width=11)
                                                                                
     Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"]
-                                                                               
   <-Reducer 27 [BROADCAST_EDGE] vectorized
-                                                                               
     BROADCAST [RS_360]
-                                                                               
       Group By Operator [GBY_359] (rows=1 width=12)
+                                                                               
   <-Reducer 26 [BROADCAST_EDGE] vectorized
+                                                                               
     BROADCAST [RS_355]
+                                                                               
       Group By Operator [GBY_354] (rows=1 width=12)
                                                                                
         
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                                               
       <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                               
         PARTITION_ONLY_SHUFFLE [RS_295]
-                                                                               
           Group By Operator [GBY_294] (rows=1 width=12)
+                                                                               
       <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                                               
         PARTITION_ONLY_SHUFFLE [RS_317]
+                                                                               
           Group By Operator [GBY_316] (rows=1 width=12)
                                                                                
             
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                                               
             Select Operator [SEL_293] (rows=50 width=4)
+                                                                               
             Select Operator [SEL_315] (rows=50 width=4)
                                                                                
               Output:["_col0"]
-                                                                               
                Please refer to the previous Select Operator [SEL_291]
-                                                                          
<-Map 25 [CONTAINS] vectorized
-                                                                            
Reduce Output Operator [RS_367]
+                                                                               
                Please refer to the previous Select Operator [SEL_313]
+                                                                          
<-Map 24 [CONTAINS] vectorized
+                                                                            
Reduce Output Operator [RS_362]
                                                                               
PartitionCols:_col0
-                                                                              
Select Operator [SEL_366] (rows=143930993 width=11)
+                                                                              
Select Operator [SEL_361] (rows=143930993 width=11)
                                                                                
 Output:["_col0","_col1","_col2"]
-                                                                               
 Filter Operator [FIL_365] (rows=143930993 width=11)
+                                                                               
 Filter Operator [FIL_360] (rows=143930993 width=11)
                                                                                
   predicate:((ws_sold_date_sk BETWEEN 
DynamicValue(RS_59_date_dim_d_date_sk_min) AND 
DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, 
DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk 
is not null and ws_sold_date_sk is not null)
                                                                                
   TableScan [TS_285] (rows=144002668 width=11)
                                                                                
     Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"]
-                                                                               
   <-Reducer 27 [BROADCAST_EDGE] vectorized
-                                                                               
     BROADCAST [RS_361]
-                                                                               
        Please refer to the previous Group By Operator [GBY_359]
-                                                      <-Reducer 2 [SIMPLE_EDGE]
-                                                        SHUFFLE [RS_95]
-                                                          PartitionCols:_col1
-                                                          Merge Join Operator 
[MERGEJOIN_269] (rows=525327388 width=114)
-                                                            
Conds:(Inner),Output:["_col0","_col1","_col2"]
-                                                          <-Map 1 
[CUSTOM_SIMPLE_EDGE] vectorized
-                                                            
PARTITION_ONLY_SHUFFLE [RS_311]
-                                                              Select Operator 
[SEL_310] (rows=525327388 width=114)
-                                                                
Output:["_col0","_col1","_col2"]
-                                                                Filter 
Operator [FIL_309] (rows=525327388 width=114)
-                                                                  
predicate:((ss_customer_sk BETWEEN 
DynamicValue(RS_96_customer_c_customer_sk_min) AND 
DynamicValue(RS_96_customer_c_customer_sk_max) and 
in_bloom_filter(ss_customer_sk, 
DynamicValue(RS_96_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is 
not null and ss_sold_date_sk is not null)
-                                                                  TableScan 
[TS_23] (rows=575995635 width=114)
-                                                                    
default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"]
-                                                                  <-Reducer 24 
[BROADCAST_EDGE] vectorized
-                                                                    BROADCAST 
[RS_308]
-                                                                      Group By 
Operator [GBY_307] (rows=1 width=12)
-                                                                        
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                                      
<-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                        
SHUFFLE [RS_306]
-                                                                          
Group By Operator [GBY_305] (rows=1 width=12)
-                                                                            
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                                            
Select Operator [SEL_304] (rows=55046 width=4)
-                                                                              
Output:["_col0"]
-                                                                               
Please refer to the previous Group By Operator [GBY_302]
-                                                          <-Reducer 13 
[CUSTOM_SIMPLE_EDGE] vectorized
-                                                            
PARTITION_ONLY_SHUFFLE [RS_330]
-                                                              Select Operator 
[SEL_329] (rows=1 width=8)
-                                                                Filter 
Operator [FIL_328] (rows=1 width=8)
-                                                                  
predicate:(sq_count_check(_col0) <= 1)
-                                                                  Group By 
Operator [GBY_327] (rows=1 width=8)
-                                                                    
Output:["_col0"],aggregations:["count(VALUE._col0)"]
-                                                                  <-Reducer 12 
[CUSTOM_SIMPLE_EDGE] vectorized
-                                                                    
PARTITION_ONLY_SHUFFLE [RS_326]
-                                                                      Group By 
Operator [GBY_325] (rows=1 width=8)
-                                                                        
Output:["_col0"],aggregations:["count()"]
-                                                                        Select 
Operator [SEL_323] (rows=25 width=4)
-                                                                           
Please refer to the previous Group By Operator [GBY_322]
+                                                                               
   <-Reducer 26 [BROADCAST_EDGE] vectorized
+                                                                               
     BROADCAST [RS_356]
+                                                                               
        Please refer to the previous Group By Operator [GBY_354]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
index d97f9df..f008099 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
@@ -227,32 +227,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 1 <- Reducer 12 (BROADCAST_EDGE)
-Reducer 10 <- Union 9 (SIMPLE_EDGE)
-Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
-Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE)
+Map 1 <- Reducer 7 (BROADCAST_EDGE)
+Reducer 11 <- Union 10 (SIMPLE_EDGE)
+Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
 Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
 Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
-Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 9 (CONTAINS)
+Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 10 (CONTAINS)
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS)
+Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 10 (CONTAINS)
 
 Stage-0
   Fetch Operator
     limit:100
     Stage-1
       Reducer 5 vectorized
-      File Output Operator [FS_146]
-        Limit [LIM_145] (rows=1 width=200)
+      File Output Operator [FS_149]
+        Limit [LIM_148] (rows=1 width=200)
           Number of rows:100
-          Select Operator [SEL_144] (rows=1 width=200)
+          Select Operator [SEL_147] (rows=1 width=200)
             Output:["_col0","_col1"]
           <-Reducer 4 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_143]
-              Group By Operator [GBY_142] (rows=1 width=200)
+            SHUFFLE [RS_146]
+              Group By Operator [GBY_145] (rows=1 width=200)
                 
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
               <-Reducer 3 [SIMPLE_EDGE]
                 SHUFFLE [RS_57]
@@ -263,47 +263,47 @@ Stage-0
                       keys:_col6,sort order:+,top n:100
                       Merge Join Operator [MERGEJOIN_118] (rows=1 width=200)
                         
Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"]
-                      <-Reducer 11 [SIMPLE_EDGE]
+                      <-Reducer 12 [SIMPLE_EDGE]
                         SHUFFLE [RS_53]
                           PartitionCols:_col1
                           Merge Join Operator [MERGEJOIN_117] (rows=1 width=92)
-                            
Conds:RS_130._col0=RS_133._col2(Inner),Output:["_col1","_col2"]
+                            
Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"]
                           <-Map 18 [SIMPLE_EDGE] vectorized
-                            SHUFFLE [RS_133]
+                            SHUFFLE [RS_144]
                               PartitionCols:_col2
-                              Select Operator [SEL_132] (rows=1704 width=276)
+                              Select Operator [SEL_143] (rows=1704 width=276)
                                 Output:["_col0","_col1","_col2"]
-                                Filter Operator [FIL_131] (rows=1704 width=181)
+                                Filter Operator [FIL_142] (rows=1704 width=181)
                                   predicate:substr(s_zip, 1, 2) is not null
                                   TableScan [TS_42] (rows=1704 width=181)
                                     
default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"]
-                          <-Reducer 10 [SIMPLE_EDGE] vectorized
-                            SHUFFLE [RS_130]
+                          <-Reducer 11 [SIMPLE_EDGE] vectorized
+                            SHUFFLE [RS_141]
                               PartitionCols:_col0
-                              Select Operator [SEL_129] (rows=1 width=184)
+                              Select Operator [SEL_140] (rows=1 width=184)
                                 Output:["_col0"]
-                                Filter Operator [FIL_128] (rows=1 width=192)
+                                Filter Operator [FIL_139] (rows=1 width=192)
                                   predicate:(_col1 = 2L)
-                                  Group By Operator [GBY_127] (rows=3098 
width=192)
+                                  Group By Operator [GBY_138] (rows=3098 
width=192)
                                     
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
-                                  <-Union 9 [SIMPLE_EDGE]
+                                  <-Union 10 [SIMPLE_EDGE]
                                     <-Reducer 16 [CONTAINS] vectorized
-                                      Reduce Output Operator [RS_167]
+                                      Reduce Output Operator [RS_170]
                                         PartitionCols:_col0
-                                        Group By Operator [GBY_166] (rows=3098 
width=192)
+                                        Group By Operator [GBY_169] (rows=3098 
width=192)
                                           
Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
-                                          Group By Operator [GBY_165] 
(rows=1126 width=192)
+                                          Group By Operator [GBY_168] 
(rows=1126 width=192)
                                             
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                                           <-Reducer 15 [SIMPLE_EDGE] vectorized
-                                            SHUFFLE [RS_164]
+                                            SHUFFLE [RS_167]
                                               PartitionCols:_col0
-                                              Group By Operator [GBY_163] 
(rows=1126 width=192)
+                                              Group By Operator [GBY_166] 
(rows=1126 width=192)
                                                 
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                                                Select Operator [SEL_162] 
(rows=2253 width=97)
+                                                Select Operator [SEL_165] 
(rows=2253 width=97)
                                                   Output:["_col0"]
-                                                  Filter Operator [FIL_161] 
(rows=2253 width=97)
+                                                  Filter Operator [FIL_164] 
(rows=2253 width=97)
                                                     predicate:(_col1 > 10L)
-                                                    Group By Operator 
[GBY_160] (rows=6761 width=97)
+                                                    Group By Operator 
[GBY_163] (rows=6761 width=97)
                                                       
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                                                     <-Reducer 14 [SIMPLE_EDGE]
                                                       SHUFFLE [RS_25]
@@ -311,40 +311,40 @@ Stage-0
                                                         Group By Operator 
[GBY_24] (rows=67610 width=97)
                                                           
Output:["_col0","_col1"],aggregations:["count()"],keys:_col1
                                                           Merge Join Operator 
[MERGEJOIN_116] (rows=26666667 width=89)
-                                                            
Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1"]
+                                                            
Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"]
                                                           <-Map 13 
[SIMPLE_EDGE] vectorized
-                                                            SHUFFLE [RS_156]
+                                                            SHUFFLE [RS_159]
                                                               
PartitionCols:_col0
-                                                              Select Operator 
[SEL_155] (rows=40000000 width=93)
+                                                              Select Operator 
[SEL_158] (rows=40000000 width=93)
                                                                 
Output:["_col0","_col1"]
-                                                                Filter 
Operator [FIL_154] (rows=40000000 width=93)
+                                                                Filter 
Operator [FIL_157] (rows=40000000 width=93)
                                                                   
predicate:substr(substr(ca_zip, 1, 5), 1, 2) is not null
                                                                   TableScan 
[TS_14] (rows=40000000 width=93)
                                                                     
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"]
                                                           <-Map 17 
[SIMPLE_EDGE] vectorized
-                                                            SHUFFLE [RS_159]
+                                                            SHUFFLE [RS_162]
                                                               
PartitionCols:_col0
-                                                              Select Operator 
[SEL_158] (rows=26666667 width=4)
+                                                              Select Operator 
[SEL_161] (rows=26666667 width=4)
                                                                 
Output:["_col0"]
-                                                                Filter 
Operator [FIL_157] (rows=26666667 width=89)
+                                                                Filter 
Operator [FIL_160] (rows=26666667 width=89)
                                                                   
predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null)
                                                                   TableScan 
[TS_17] (rows=80000000 width=89)
                                                                     
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"]
-                                    <-Reducer 8 [CONTAINS] vectorized
-                                      Reduce Output Operator [RS_153]
+                                    <-Reducer 9 [CONTAINS] vectorized
+                                      Reduce Output Operator [RS_156]
                                         PartitionCols:_col0
-                                        Group By Operator [GBY_152] (rows=3098 
width=192)
+                                        Group By Operator [GBY_155] (rows=3098 
width=192)
                                           
Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0
-                                          Group By Operator [GBY_151] 
(rows=5071 width=192)
+                                          Group By Operator [GBY_154] 
(rows=5071 width=192)
                                             
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
-                                          <-Map 7 [SIMPLE_EDGE] vectorized
-                                            SHUFFLE [RS_150]
+                                          <-Map 8 [SIMPLE_EDGE] vectorized
+                                            SHUFFLE [RS_153]
                                               PartitionCols:_col0
-                                              Group By Operator [GBY_149] 
(rows=70994 width=192)
+                                              Group By Operator [GBY_152] 
(rows=70994 width=192)
                                                 
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                                                Select Operator [SEL_148] 
(rows=20000000 width=89)
+                                                Select Operator [SEL_151] 
(rows=20000000 width=89)
                                                   Output:["_col0"]
-                                                  Filter Operator [FIL_147] 
(rows=20000000 width=89)
+                                                  Filter Operator [FIL_150] 
(rows=20000000 width=89)
                                                     predicate:((substr(ca_zip, 
1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', 
'40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', 
'18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', 
'67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', 
'90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', 
'10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', 
'26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', 
'99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', 
'14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', 
'54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', 
'25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', 
'29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', 
'48649', '39050', '41793', '25002', '27413', '39736'
 , '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', 
'21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', 
'51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', 
'41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', 
'49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', 
'50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', 
'23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', 
'56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', 
'28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', 
'35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', 
'11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', 
'14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', 
'11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', 
'38936', '13036', '88376', '45539', '35901', '19506', '6569
 0', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', 
'38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', 
'76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', 
'15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', 
'24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', 
'59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', 
'71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', 
'49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', 
'54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', 
'59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', 
'52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', 
'92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', 
'30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', 
'93578', '83583', '46047', '94167', '82564', '21156', '15
 799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', 
'91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', 
'49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', 
'36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', 
'26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', 
'11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', 
'61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', 
'99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', 
'14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', 
'21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 
1, 2) is not null)
                                                     TableScan [TS_6] 
(rows=40000000 width=89)
                                                       
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"]
@@ -352,34 +352,34 @@ Stage-0
                         SHUFFLE [RS_52]
                           PartitionCols:_col1
                           Merge Join Operator [MERGEJOIN_115] (rows=37399754 
width=42)
-                            
Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2"]
+                            
Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"]
+                          <-Map 6 [SIMPLE_EDGE] vectorized
+                            PARTITION_ONLY_SHUFFLE [RS_129]
+                              PartitionCols:_col0
+                              Select Operator [SEL_128] (rows=130 width=4)
+                                Output:["_col0"]
+                                Filter Operator [FIL_127] (rows=130 width=12)
+                                  predicate:((d_qoy = 1) and (d_year = 2002))
+                                  TableScan [TS_3] (rows=73049 width=12)
+                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"]
                           <-Map 1 [SIMPLE_EDGE] vectorized
-                            SHUFFLE [RS_138]
+                            SHUFFLE [RS_137]
                               PartitionCols:_col0
-                              Select Operator [SEL_137] (rows=525329897 
width=114)
+                              Select Operator [SEL_136] (rows=525329897 
width=114)
                                 Output:["_col0","_col1","_col2"]
-                                Filter Operator [FIL_136] (rows=525329897 
width=114)
-                                  predicate:((ss_store_sk BETWEEN 
DynamicValue(RS_53_store_s_store_sk_min) AND 
DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, 
DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not 
null and ss_store_sk is not null)
+                                Filter Operator [FIL_135] (rows=525329897 
width=114)
+                                  predicate:((ss_sold_date_sk BETWEEN 
DynamicValue(RS_50_date_dim_d_date_sk_min) AND 
DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, 
DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is 
not null and ss_store_sk is not null)
                                   TableScan [TS_0] (rows=575995635 width=114)
                                     
default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"]
-                                  <-Reducer 12 [BROADCAST_EDGE] vectorized
-                                    BROADCAST [RS_135]
-                                      Group By Operator [GBY_134] (rows=1 
width=12)
+                                  <-Reducer 7 [BROADCAST_EDGE] vectorized
+                                    BROADCAST [RS_134]
+                                      Group By Operator [GBY_133] (rows=1 
width=12)
                                         
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                      <-Reducer 11 [CUSTOM_SIMPLE_EDGE]
-                                        SHUFFLE [RS_92]
-                                          Group By Operator [GBY_91] (rows=1 
width=12)
+                                      <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized
+                                        PARTITION_ONLY_SHUFFLE [RS_132]
+                                          Group By Operator [GBY_131] (rows=1 
width=12)
                                             
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                            Select Operator [SEL_90] (rows=1 
width=8)
+                                            Select Operator [SEL_130] 
(rows=130 width=4)
                                               Output:["_col0"]
-                                               Please refer to the previous 
Merge Join Operator [MERGEJOIN_117]
-                          <-Map 6 [SIMPLE_EDGE] vectorized
-                            SHUFFLE [RS_141]
-                              PartitionCols:_col0
-                              Select Operator [SEL_140] (rows=130 width=4)
-                                Output:["_col0"]
-                                Filter Operator [FIL_139] (rows=130 width=12)
-                                  predicate:((d_qoy = 1) and (d_year = 2002))
-                                  TableScan [TS_3] (rows=73049 width=12)
-                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"]
+                                               Please refer to the previous 
Select Operator [SEL_128]
 

Reply via email to