(hive) branch master updated: HIVE-28488: Merge adjacent union distinct (Seonggon Namgung, reviewed by Denys Kuzmenko, Shohei Okumiya)

dkuzmenko Wed, 13 Nov 2024 05:39:04 -0800

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 06206b526c3 HIVE-28488: Merge adjacent union distinct (Seonggon 
Namgung, reviewed by Denys Kuzmenko, Shohei Okumiya)
06206b526c3 is described below

commit 06206b526c32b1f60ba1f01e1abc5a59855f8fce
Author: seonggon <[email protected]>
AuthorDate: Wed Nov 13 22:38:51 2024 +0900

    HIVE-28488: Merge adjacent union distinct (Seonggon Namgung, reviewed by 
Denys Kuzmenko, Shohei Okumiya)
    
    Closes #5423
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   2 +
 .../hive/ql/optimizer/UnionDistinctMerger.java     | 185 +++++
 .../apache/hadoop/hive/ql/parse/TezCompiler.java   |   8 +
 ql/src/test/queries/clientpositive/explainuser_2.q |   1 +
 .../test/queries/clientpositive/unionDistinct_3.q  |   1 +
 .../clientpositive/union_distinct_hive_28488.q     |  60 ++
 .../llap/union_distinct_hive_28488.q.out           | 789 +++++++++++++++++++++
 .../perf/tpcds30tb/tez/query75.q.out               | 260 +++----
 8 files changed, 1153 insertions(+), 153 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0d3f9e358ba..125d6acf072 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2655,6 +2655,8 @@ public class HiveConf extends Configuration {
         "By default, when writing data into a table and UNION ALL is the last 
step of the query, Hive on Tez will\n" +
         "create a subdirectory for each branch of the UNION ALL. When this 
property is enabled,\n" +
         "the subdirectories are removed, and the files are renamed and moved 
to the parent directory"),
+    
HIVE_OPTIMIZE_MERGE_ADJACENT_UNION_DISTINCT("hive.optimize.merge.adjacent.union.distinct",
 true,
+        "Whether to merge adjacent binary UNION DISTINCT into a single n-ary 
UNION DISTINCT."),
     HIVE_OPT_CORRELATION("hive.optimize.correlation", false, "exploit 
intra-query correlations."),
 
     HIVE_OPTIMIZE_LIMIT_TRANSPOSE("hive.optimize.limittranspose", false,
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/UnionDistinctMerger.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/UnionDistinctMerger.java
new file mode 100644
index 00000000000..e42c5d060fb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/UnionDistinctMerger.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.lib.SemanticDispatcher;
+import org.apache.hadoop.hive.ql.lib.SemanticGraphWalker;
+import org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor;
+import org.apache.hadoop.hive.ql.lib.SemanticRule;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+public class UnionDistinctMerger extends Transform {
+  private static final Logger LOG = 
LoggerFactory.getLogger(UnionDistinctMerger.class);
+
+  private static final String PATTERN_STRING = new StringBuilder()
+      .append(UnionOperator.getOperatorName()).append("%")
+      .append(GroupByOperator.getOperatorName()).append("%")
+      .append(ReduceSinkOperator.getOperatorName()).append("%")
+      .append(GroupByOperator.getOperatorName()).append("%")
+      .append(UnionOperator.getOperatorName()).append("%")
+      .append(GroupByOperator.getOperatorName()).append("%")
+      .append(ReduceSinkOperator.getOperatorName()).append("%")
+      .append(GroupByOperator.getOperatorName()).append("%")
+      .toString();
+
+  private static class UnionMergeContext implements NodeProcessorCtx {
+    public final ParseContext pCtx;
+
+    public UnionMergeContext(ParseContext pCtx) {
+      this.pCtx = pCtx;
+    }
+  }
+
+  private class UnionMergeProcessor implements SemanticNodeProcessor {
+    @Override
+    public Void process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      UnionMergeContext context = (UnionMergeContext) procCtx;
+
+      // The stack contains at least 8 operators, 
UNION-GBY-RS-GBY-UNION-GBY-RS-GBY.
+      // The leftmost UNION is on stack.size() - 8 and the rightmost GBY is on 
stack.size() - 1.
+      Set<Operator> allOps = new HashSet<>(context.pCtx.getAllOps());
+      for (int i = 1; i <= 8; i ++) {
+        Operator<?> op = (Operator<?>) stack.get(stack.size() - i);
+
+        // We do not apply the optimization if some operators do not belong to 
query plan.
+        // This can be happened when we already merged some UNIONs before.
+        // For example, Suppose that a query plan looks like the below graph:
+        //   (1)UNION-GBY-RS-GBY-(3)UNION-GBY-RS-GBY
+        //   (2)UNION-GBY-RS-GBY-
+        // Then we merge (1)-(3) to (1) and them move on to merging (2)-(3). 
Without checking the presence of
+        // operators in (2) and (3), the merge process will fail as (3) is 
already removed.
+        if (!allOps.contains(op)) {
+          return null;
+        }
+
+        // We do not apply the optimization if intermediate outputs are used 
by other operators.
+        if (i != 1 && op.getChildOperators().size() > 1) {
+          return null;
+        }
+      }
+
+      UnionOperator upperUnionOperator = (UnionOperator) 
stack.get(stack.size() - 8);
+      GroupByOperator upperFinalGroupByOperator = (GroupByOperator) 
stack.get(stack.size() - 5);
+
+      UnionOperator lowerUnionOperator = (UnionOperator) 
stack.get(stack.size() - 4);
+      GroupByOperator lowerFinalGroupByOperator = (GroupByOperator) 
stack.get(stack.size() - 1);
+
+      // We can apply the optimization if there is no aggregators in final 
GroupBy operators. The absence of
+      // aggregators ensures that we are merging two distinct computation.
+      if (upperFinalGroupByOperator.getConf().getAggregators().isEmpty() &&
+          lowerFinalGroupByOperator.getConf().getAggregators().isEmpty()) {
+        LOG.info("Detect duplicate UNION-DISTINCT GBY patterns. Remove the 
latter one.");
+
+        // Step 0. UNION1->GBY1->RS1->GBY2->UNION2->GBY3->RS2->GBY4
+
+        // Step 1. Cut GBY2->UNION2
+        lowerUnionOperator.removeParent(upperFinalGroupByOperator);
+
+        // Step 2.
+        //   Connect the parent of lowerUnionOperator and upperUnionOperator.
+        //   Disconnect lowerUnionOperator from operator graph.
+        // Before step 2:
+        //    {OP1, 2}-UNION1->GBY1->RS1->GBY2-{}
+        //    {OP3, 4}-UNION2->GBY3->RS2->GBY4-{OP5, 6, ...}
+        // After step 2:
+        //    {OP1, 2, 3, 4}-UNION1->GBY1->RS1->GBY2-{}
+        //                {}-UNION2->GBY3->RS2->GBY4-{OP5, 6, ...}
+        for (Operator<?> lowerUnionParent: 
lowerUnionOperator.getParentOperators()) {
+          lowerUnionParent.replaceChild(lowerUnionOperator, 
upperUnionOperator);
+          upperUnionOperator.getParentOperators().add(lowerUnionParent);
+        }
+        lowerUnionOperator.setParentOperators(new ArrayList<>());
+
+        // Step 3.
+        //   Connect upperFinalGroupByOperator and the children of 
lowerFinalGroupByOperator.
+        //   Disconnect lowerFinalGroupByOperator from operator graph.
+        // Before step 3:
+        //    {OP1, 2, ...}-UNION1->GBY1->RS1->GBY2-{}
+        //               {}-UNION2->GBY3->RS2->GBY4-{OP5, 6, ...}
+        // After step 3:
+        //    {OP1, 2, ...}-UNION1->GBY1->RS1->GBY2-{OP5, 6, ...}
+        //               {}-UNION2->GBY3->RS2->GBY4-{}
+        for (Operator<?> lowerFinalGroupByChild: 
lowerFinalGroupByOperator.getChildOperators()) {
+          lowerFinalGroupByChild.replaceParent(lowerFinalGroupByOperator, 
upperFinalGroupByOperator);
+          
upperFinalGroupByOperator.getChildOperators().add(lowerFinalGroupByChild);
+        }
+        
upperUnionOperator.getConf().setNumInputs(upperUnionOperator.getNumParent());
+      }
+
+      return null;
+    }
+  }
+
+  private static class NoSkipGraphWalker extends DefaultGraphWalker {
+    public NoSkipGraphWalker(SemanticDispatcher disp) {
+      super(disp);
+    }
+
+    @Override
+    public void startWalking(Collection<Node> startNodes,
+        HashMap<Node, Object> nodeOutput) throws SemanticException {
+      toWalk.addAll(startNodes);
+      while (!toWalk.isEmpty()) {
+        Node nd = toWalk.remove(0);
+        walk(nd);
+        // We need to revisit GroupBy operator for every distinct operator 
path.
+        // GraphWalker uses retMap to determine if an operator has been 
visited.
+        // Clearing it after each walk() ensures that we visit GroupBy 
operator in every possible path.
+        retMap.clear();
+      }
+    }
+  }
+
+  public ParseContext transform(ParseContext pCtx) throws SemanticException {
+    Map<SemanticRule, SemanticNodeProcessor> testRules = new LinkedHashMap<>();
+    testRules.put(new RuleRegExp("AdjacentDistinctUnion", PATTERN_STRING), new 
UnionMergeProcessor());
+    SemanticDispatcher disp = new DefaultRuleDispatcher(null, testRules, new 
UnionMergeContext(pCtx));
+    SemanticGraphWalker ogw = new NoSkipGraphWalker(disp);
+
+    List<Node> topNodes = new ArrayList<>();
+    topNodes.addAll(pCtx.getTopOps().values());
+    ogw.startWalking(topNodes, null);
+
+    return pCtx;
+  }
+}
+
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 6c17e987889..6a32c84b9c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -119,6 +119,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
 import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
 import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
 import 
org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
+import org.apache.hadoop.hive.ql.optimizer.UnionDistinctMerger;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
@@ -213,6 +214,13 @@ public class TezCompiler extends TaskCompiler {
       perfLogger.perfLogEnd(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization");
     }
 
+    perfLogger.perfLogBegin(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER);
+    if 
(procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_MERGE_ADJACENT_UNION_DISTINCT)) 
{
+      // This should be run before ReduceSinkDeDuplication in order not to 
merge irrelevant GroupBy operators.
+      new UnionDistinctMerger().transform(procCtx.parseContext);
+    }
+    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, 
"Run adjacent union distinct merger");
+
     if(HiveConf.getBoolVar(procCtx.conf, 
HiveConf.ConfVars.HIVE_OPT_REDUCE_DEDUPLICATION)) {
       perfLogger.perfLogBegin(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER);
       // Dynamic sort partition adds an extra RS therefore need to de-dup
diff --git a/ql/src/test/queries/clientpositive/explainuser_2.q 
b/ql/src/test/queries/clientpositive/explainuser_2.q
index 1f905c594aa..6d1e2b3d244 100644
--- a/ql/src/test/queries/clientpositive/explainuser_2.q
+++ b/ql/src/test/queries/clientpositive/explainuser_2.q
@@ -6,6 +6,7 @@ set hive.strict.checks.bucketing=false;
 set hive.explain.user=true;
 set hive.metastore.aggregate.stats.cache.enabled=false;
 set hive.cbo.fallback.strategy=NEVER;
+set hive.optimize.merge.adjacent.union.distinct=false;
 
 -- SORT_QUERY_RESULTS
 
diff --git a/ql/src/test/queries/clientpositive/unionDistinct_3.q 
b/ql/src/test/queries/clientpositive/unionDistinct_3.q
index 25828286776..1d9e74db7da 100644
--- a/ql/src/test/queries/clientpositive/unionDistinct_3.q
+++ b/ql/src/test/queries/clientpositive/unionDistinct_3.q
@@ -3,6 +3,7 @@
 --! qt:dataset:src
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
+set hive.optimize.merge.adjacent.union.distinct=false;
 
 -- union2.q
 
diff --git a/ql/src/test/queries/clientpositive/union_distinct_hive_28488.q 
b/ql/src/test/queries/clientpositive/union_distinct_hive_28488.q
new file mode 100644
index 00000000000..47dc589c9dd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_distinct_hive_28488.q
@@ -0,0 +1,60 @@
+set hive.optimize.shared.work=false;
+
+create table union_test (key string, value string);
+
+set hive.optimize.merge.adjacent.union.distinct=false;
+explain
+select * from (
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d1
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d2
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d3
+) d;
+
+set hive.optimize.merge.adjacent.union.distinct=true;
+explain
+select * from (
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d1
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d2
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d3
+) d;
+
diff --git 
a/ql/src/test/results/clientpositive/llap/union_distinct_hive_28488.q.out 
b/ql/src/test/results/clientpositive/llap/union_distinct_hive_28488.q.out
new file mode 100644
index 00000000000..fbd7e973ffa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/union_distinct_hive_28488.q.out
@@ -0,0 +1,789 @@
+PREHOOK: query: create table union_test (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_test
+POSTHOOK: query: create table union_test (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_test
+PREHOOK: query: explain
+select * from (
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d1
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d2
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d3
+) d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_test
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select * from (
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d1
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d2
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d3
+) d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Union 2 (CONTAINS)
+        Map 10 <- Union 2 (CONTAINS)
+        Map 11 <- Union 4 (CONTAINS)
+        Map 12 <- Union 13 (CONTAINS)
+        Map 17 <- Union 13 (CONTAINS)
+        Map 18 <- Union 15 (CONTAINS)
+        Map 19 <- Union 20 (CONTAINS)
+        Map 24 <- Union 20 (CONTAINS)
+        Map 25 <- Union 22 (CONTAINS)
+        Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS)
+        Reducer 16 <- Union 15 (SIMPLE_EDGE), Union 6 (CONTAINS)
+        Reducer 21 <- Union 20 (SIMPLE_EDGE), Union 22 (CONTAINS)
+        Reducer 23 <- Union 22 (SIMPLE_EDGE), Union 8 (CONTAINS)
+        Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS)
+        Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS)
+        Reducer 9 <- Union 8 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 11 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 12 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 17 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 18 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 19 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 24 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 25 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 14 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 16 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 21 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 23 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 7 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string), _col1 (type: string)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                    Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 9 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Union 13 
+            Vertex: Union 13
+        Union 15 
+            Vertex: Union 15
+        Union 2 
+            Vertex: Union 2
+        Union 20 
+            Vertex: Union 20
+        Union 22 
+            Vertex: Union 22
+        Union 4 
+            Vertex: Union 4
+        Union 6 
+            Vertex: Union 6
+        Union 8 
+            Vertex: Union 8
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select * from (
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d1
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d2
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d3
+) d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_test
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select * from (
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d1
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d2
+  union
+  select * from (
+    select * from union_test
+    union
+    select * from union_test
+    union
+    select * from union_test
+  ) d3
+) d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Union 2 (CONTAINS)
+        Map 10 <- Union 2 (CONTAINS)
+        Map 11 <- Union 2 (CONTAINS)
+        Map 4 <- Union 2 (CONTAINS)
+        Map 5 <- Union 2 (CONTAINS)
+        Map 6 <- Union 2 (CONTAINS)
+        Map 7 <- Union 2 (CONTAINS)
+        Map 8 <- Union 2 (CONTAINS)
+        Map 9 <- Union 2 (CONTAINS)
+        Reducer 3 <- Union 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 11 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: union_test
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string)
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Union 2 
+            Vertex: Union 2
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
diff --git 
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out 
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out
index 8422dee3b3a..d8a87abb66e 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out
@@ -7,36 +7,34 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 25 <- Reducer 12 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE)
-        Map 26 <- Map 1 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Reducer 22 
(BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
-        Map 31 <- Map 1 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Reducer 23 
(BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
-        Map 36 <- Reducer 33 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE)
-        Map 5 <- Reducer 28 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE)
-        Map 6 <- Map 1 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Reducer 2 
(BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE)
-        Reducer 11 <- Union 10 (SIMPLE_EDGE)
-        Reducer 12 <- Map 6 (CUSTOM_SIMPLE_EDGE)
-        Reducer 13 <- Map 25 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE), 
Union 14 (CONTAINS)
-        Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS)
-        Reducer 17 <- Union 16 (SIMPLE_EDGE)
-        Reducer 18 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 17 
(CUSTOM_SIMPLE_EDGE)
-        Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+        Map 21 <- Reducer 10 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE)
+        Map 22 <- Map 1 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Reducer 18 
(BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+        Map 27 <- Map 1 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Reducer 19 
(BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
+        Map 32 <- Reducer 29 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE)
+        Map 5 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE)
+        Map 6 <- Map 1 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Reducer 2 
(BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE)
+        Reducer 10 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+        Reducer 11 <- Map 21 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE), 
Union 12 (CONTAINS)
+        Reducer 13 <- Union 12 (SIMPLE_EDGE)
+        Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 9 
(CUSTOM_SIMPLE_EDGE)
+        Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
+        Reducer 16 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+        Reducer 18 <- Map 17 (SIMPLE_EDGE)
+        Reducer 19 <- Map 17 (SIMPLE_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 20 <- Map 6 (CUSTOM_SIMPLE_EDGE)
-        Reducer 22 <- Map 21 (SIMPLE_EDGE)
-        Reducer 23 <- Map 21 (SIMPLE_EDGE)
-        Reducer 24 <- Map 21 (SIMPLE_EDGE)
-        Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), 
Union 8 (CONTAINS)
-        Reducer 28 <- Map 26 (CUSTOM_SIMPLE_EDGE)
-        Reducer 29 <- Map 26 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), 
Union 14 (CONTAINS)
+        Reducer 20 <- Map 17 (SIMPLE_EDGE)
+        Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), 
Union 8 (CONTAINS)
+        Reducer 24 <- Map 22 (CUSTOM_SIMPLE_EDGE)
+        Reducer 25 <- Map 22 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), 
Union 12 (CONTAINS)
+        Reducer 26 <- Map 22 (CUSTOM_SIMPLE_EDGE)
+        Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE), Map 32 
(CUSTOM_SIMPLE_EDGE), Union 8 (CONTAINS)
+        Reducer 29 <- Map 27 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Map 1 (SIMPLE_EDGE)
-        Reducer 30 <- Map 26 (CUSTOM_SIMPLE_EDGE)
-        Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE), Map 36 
(CUSTOM_SIMPLE_EDGE), Union 10 (CONTAINS)
-        Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE)
-        Reducer 34 <- Map 31 (CUSTOM_SIMPLE_EDGE), Map 36 
(CUSTOM_SIMPLE_EDGE), Union 16 (CONTAINS)
-        Reducer 35 <- Map 31 (CUSTOM_SIMPLE_EDGE)
+        Reducer 30 <- Map 27 (CUSTOM_SIMPLE_EDGE), Map 32 
(CUSTOM_SIMPLE_EDGE), Union 12 (CONTAINS)
+        Reducer 31 <- Map 27 (CUSTOM_SIMPLE_EDGE)
         Reducer 4 <- Map 1 (SIMPLE_EDGE)
-        Reducer 7 <- Map 25 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE), 
Union 8 (CONTAINS)
-        Reducer 9 <- Union 10 (CONTAINS), Union 8 (SIMPLE_EDGE)
+        Reducer 7 <- Map 21 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE), 
Union 8 (CONTAINS)
+        Reducer 9 <- Union 8 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -96,7 +94,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 21 
+        Map 17 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -136,13 +134,13 @@ STAGE PLANS:
                             Target Input: store_sales
                             Partition key expr: ss_sold_date_sk
                             Statistics: Num rows: 367 Data size: 2936 Basic 
stats: COMPLETE Column stats: COMPLETE
-                            Target Vertex: Map 26
+                            Target Vertex: Map 22
                           Dynamic Partitioning Event Operator
                             Target column: ws_sold_date_sk (bigint)
                             Target Input: web_sales
                             Partition key expr: ws_sold_date_sk
                             Statistics: Num rows: 367 Data size: 2936 Basic 
stats: COMPLETE Column stats: COMPLETE
-                            Target Vertex: Map 31
+                            Target Vertex: Map 27
                       Reduce Output Operator
                         key expressions: _col0 (type: bigint)
                         null sort order: z
@@ -189,13 +187,13 @@ STAGE PLANS:
                             Target Input: web_sales
                             Partition key expr: ws_sold_date_sk
                             Statistics: Num rows: 367 Data size: 2936 Basic 
stats: COMPLETE Column stats: COMPLETE
-                            Target Vertex: Map 31
+                            Target Vertex: Map 27
                           Dynamic Partitioning Event Operator
                             Target column: ss_sold_date_sk (bigint)
                             Target Input: store_sales
                             Partition key expr: ss_sold_date_sk
                             Statistics: Num rows: 367 Data size: 2936 Basic 
stats: COMPLETE Column stats: COMPLETE
-                            Target Vertex: Map 26
+                            Target Vertex: Map 22
                       Reduce Output Operator
                         key expressions: _col0 (type: bigint)
                         null sort order: z
@@ -210,7 +208,7 @@ STAGE PLANS:
                         Statistics: Num rows: 367 Data size: 2936 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 25 
+        Map 21 
             Map Operator Tree:
                 TableScan
                   alias: catalog_returns
@@ -246,7 +244,7 @@ STAGE PLANS:
                         value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 26 
+        Map 22 
             Map Operator Tree:
                 TableScan
                   alias: store_sales
@@ -263,7 +261,7 @@ STAGE PLANS:
                         1 _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Reducer 22
+                        1 Reducer 18
                       Statistics: Num rows: 16583283491 Data size: 
1963216325036 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
@@ -305,7 +303,7 @@ STAGE PLANS:
                         1 _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Map 21
+                        1 Map 17
                       Statistics: Num rows: 16583283491 Data size: 
1963216325036 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
@@ -341,7 +339,7 @@ STAGE PLANS:
                               value expressions: _col0 (type: bigint), _col1 
(type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 31 
+        Map 27 
             Map Operator Tree:
                 TableScan
                   alias: web_sales
@@ -358,7 +356,7 @@ STAGE PLANS:
                         1 _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Reducer 23
+                        1 Reducer 19
                       Statistics: Num rows: 4340155038 Data size: 572587087908 
Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
@@ -400,7 +398,7 @@ STAGE PLANS:
                         1 _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Map 21
+                        1 Map 17
                       Statistics: Num rows: 4340155038 Data size: 572587087908 
Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
@@ -436,7 +434,7 @@ STAGE PLANS:
                               value expressions: _col0 (type: bigint), _col1 
(type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 36 
+        Map 32 
             Map Operator Tree:
                 TableScan
                   alias: web_returns
@@ -525,7 +523,7 @@ STAGE PLANS:
                         1 _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Map 21
+                        1 Map 17
                       Statistics: Num rows: 8582195972 Data size: 
1120372034864 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
@@ -567,7 +565,7 @@ STAGE PLANS:
                         1 _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Reducer 24
+                        1 Reducer 20
                       Statistics: Num rows: 8582195972 Data size: 
1120372034864 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
@@ -603,28 +601,7 @@ STAGE PLANS:
                               value expressions: _col0 (type: bigint), _col1 
(type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Reducer 11 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 
(type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: 
decimal(8,2))
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 7061070159 Data size: 932061249496 Basic 
stats: COMPLETE Column stats: COMPLETE
-                Group By Operator
-                  aggregations: sum(_col4), sum(_col5)
-                  keys: _col0 (type: int), _col1 (type: int), _col2 (type: 
int), _col3 (type: int)
-                  mode: complete
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                  Statistics: Num rows: 177920028 Data size: 24197123536 Basic 
stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int)
-                    null sort order: zzzz
-                    sort order: ++++
-                    Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
-                    Statistics: Num rows: 177920028 Data size: 24197123536 
Basic stats: COMPLETE Column stats: COMPLETE
-                    value expressions: _col4 (type: bigint), _col5 (type: 
decimal(18,2))
-        Reducer 12 
+        Reducer 10 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -637,7 +614,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
-        Reducer 13 
+        Reducer 11 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
@@ -648,7 +625,7 @@ STAGE PLANS:
                   1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 
(type: bigint)
                 outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, 
_col13, _col14
                 input vertices:
-                  1 Map 25
+                  1 Map 21
                 Statistics: Num rows: 2017213214 Data size: 388166715564 Basic 
stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Select Operator
@@ -665,9 +642,9 @@ STAGE PLANS:
                       key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
                       null sort order: zzzzzz
                       sort order: ++++++
-                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 
(type: decimal(8,2))
+                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
                       Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 15 
+        Reducer 13 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -675,26 +652,6 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 6029744178 Data size: 795926226580 Basic 
stats: COMPLETE Column stats: COMPLETE
-                Group By Operator
-                  keys: _col0 (type: int), _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2))
-                  minReductionHashAggr: 0.4
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                  Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
-                    null sort order: zzzzzz
-                    sort order: ++++++
-                    Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
-                    Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 17 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 
(type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: 
decimal(8,2))
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 7061070159 Data size: 932061249496 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col4), sum(_col5)
                   keys: _col0 (type: int), _col1 (type: int), _col2 (type: 
int), _col3 (type: int)
@@ -708,7 +665,7 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
                     Statistics: Num rows: 177920028 Data size: 24197123536 
Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col4 (type: bigint), _col5 (type: 
decimal(18,2))
-        Reducer 18 
+        Reducer 14 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
@@ -719,7 +676,7 @@ STAGE PLANS:
                   1 KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: 
int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col10, _col11
                 input vertices:
-                  1 Reducer 11
+                  1 Reducer 9
                 Statistics: Num rows: 32072478585127 Data size: 
8210554517792240 Basic stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Filter Operator
@@ -741,7 +698,7 @@ STAGE PLANS:
                         sort order: +
                         Statistics: Num rows: 10690826195042 Data size: 
1625005581646208 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: int), _col1 (type: 
int), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: 
bigint), _col7 (type: decimal(19,2))
-        Reducer 19 
+        Reducer 15 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -762,20 +719,7 @@ STAGE PLANS:
                           input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                           serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 2 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 
(type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Reduce Output Operator
-                  key expressions: _col0 (type: bigint)
-                  null sort order: z
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: bigint)
-                  Statistics: Num rows: 41585 Data size: 996412 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int)
-        Reducer 20 
+        Reducer 16 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -788,7 +732,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
-        Reducer 22 
+        Reducer 18 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -800,7 +744,7 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col0 (type: bigint)
                   Statistics: Num rows: 367 Data size: 2936 Basic stats: 
COMPLETE Column stats: COMPLETE
-        Reducer 23 
+        Reducer 19 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -812,7 +756,20 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col0 (type: bigint)
                   Statistics: Num rows: 367 Data size: 2936 Basic stats: 
COMPLETE Column stats: COMPLETE
-        Reducer 24 
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 
(type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Reduce Output Operator
+                  key expressions: _col0 (type: bigint)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: bigint)
+                  Statistics: Num rows: 41585 Data size: 996412 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int)
+        Reducer 20 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -824,7 +781,7 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col0 (type: bigint)
                   Statistics: Num rows: 367 Data size: 2936 Basic stats: 
COMPLETE Column stats: COMPLETE
-        Reducer 27 
+        Reducer 23 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
@@ -852,9 +809,9 @@ STAGE PLANS:
                       key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
                       null sort order: zzzzzz
                       sort order: ++++++
-                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 
(type: decimal(8,2))
+                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
                       Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 28 
+        Reducer 24 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -867,7 +824,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
-        Reducer 29 
+        Reducer 25 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
@@ -895,22 +852,9 @@ STAGE PLANS:
                       key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
                       null sort order: zzzzzz
                       sort order: ++++++
-                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 
(type: decimal(8,2))
+                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
                       Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 3 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 
(type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Reduce Output Operator
-                  key expressions: _col0 (type: bigint)
-                  null sort order: z
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: bigint)
-                  Statistics: Num rows: 41585 Data size: 996412 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int)
-        Reducer 30 
+        Reducer 26 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -923,7 +867,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
-        Reducer 32 
+        Reducer 28 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
@@ -934,7 +878,7 @@ STAGE PLANS:
                   1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 
(type: bigint)
                 outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, 
_col13, _col14
                 input vertices:
-                  1 Map 36
+                  1 Map 32
                 Statistics: Num rows: 1031325981 Data size: 198862953200 Basic 
stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Select Operator
@@ -946,14 +890,14 @@ STAGE PLANS:
                     minReductionHashAggr: 0.4
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                    Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
                       null sort order: zzzzzz
                       sort order: ++++++
                       Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
-                      Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 33 
+                      Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 29 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -966,7 +910,20 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
-        Reducer 34 
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 
(type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Reduce Output Operator
+                  key expressions: _col0 (type: bigint)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: bigint)
+                  Statistics: Num rows: 41585 Data size: 996412 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int)
+        Reducer 30 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
@@ -977,7 +934,7 @@ STAGE PLANS:
                   1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 
(type: bigint)
                 outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, 
_col13, _col14
                 input vertices:
-                  1 Map 36
+                  1 Map 32
                 Statistics: Num rows: 1031325981 Data size: 198862953200 Basic 
stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Select Operator
@@ -989,14 +946,14 @@ STAGE PLANS:
                     minReductionHashAggr: 0.4
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                    Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
                       null sort order: zzzzzz
                       sort order: ++++++
                       Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
-                      Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 35 
+                      Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 31 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -1033,7 +990,7 @@ STAGE PLANS:
                   1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 
(type: bigint)
                 outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10, 
_col13, _col14
                 input vertices:
-                  1 Map 25
+                  1 Map 21
                 Statistics: Num rows: 2017213214 Data size: 388166715564 Basic 
stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Select Operator
@@ -1050,7 +1007,7 @@ STAGE PLANS:
                       key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
                       null sort order: zzzzzz
                       sort order: ++++++
-                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 
(type: decimal(8,2))
+                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
                       Statistics: Num rows: 6029744178 Data size: 795926226580 
Basic stats: COMPLETE Column stats: COMPLETE
         Reducer 9 
             Execution mode: vectorized, llap
@@ -1061,23 +1018,20 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 6029744178 Data size: 795926226580 Basic 
stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
-                  keys: _col0 (type: int), _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2))
-                  minReductionHashAggr: 0.4
-                  mode: hash
+                  aggregations: sum(_col4), sum(_col5)
+                  keys: _col0 (type: int), _col1 (type: int), _col2 (type: 
int), _col3 (type: int)
+                  mode: complete
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                  Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 177920028 Data size: 24197123536 Basic 
stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
-                    key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: 
decimal(8,2))
-                    null sort order: zzzzzz
-                    sort order: ++++++
+                    key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int)
+                    null sort order: zzzz
+                    sort order: ++++
                     Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int)
-                    Statistics: Num rows: 7061070159 Data size: 932061249496 
Basic stats: COMPLETE Column stats: COMPLETE
-        Union 10 
-            Vertex: Union 10
-        Union 14 
-            Vertex: Union 14
-        Union 16 
-            Vertex: Union 16
+                    Statistics: Num rows: 177920028 Data size: 24197123536 
Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col4 (type: bigint), _col5 (type: 
decimal(18,2))
+        Union 12 
+            Vertex: Union 12
         Union 8 
             Vertex: Union 8

(hive) branch master updated: HIVE-28488: Merge adjacent union distinct (Seonggon Namgung, reviewed by Denys Kuzmenko, Shohei Okumiya)

Reply via email to