This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 06206b526c3 HIVE-28488: Merge adjacent union distinct (Seonggon
Namgung, reviewed by Denys Kuzmenko, Shohei Okumiya)
06206b526c3 is described below
commit 06206b526c32b1f60ba1f01e1abc5a59855f8fce
Author: seonggon <[email protected]>
AuthorDate: Wed Nov 13 22:38:51 2024 +0900
HIVE-28488: Merge adjacent union distinct (Seonggon Namgung, reviewed by
Denys Kuzmenko, Shohei Okumiya)
Closes #5423
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +
.../hive/ql/optimizer/UnionDistinctMerger.java | 185 +++++
.../apache/hadoop/hive/ql/parse/TezCompiler.java | 8 +
ql/src/test/queries/clientpositive/explainuser_2.q | 1 +
.../test/queries/clientpositive/unionDistinct_3.q | 1 +
.../clientpositive/union_distinct_hive_28488.q | 60 ++
.../llap/union_distinct_hive_28488.q.out | 789 +++++++++++++++++++++
.../perf/tpcds30tb/tez/query75.q.out | 260 +++----
8 files changed, 1153 insertions(+), 153 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0d3f9e358ba..125d6acf072 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2655,6 +2655,8 @@ public class HiveConf extends Configuration {
"By default, when writing data into a table and UNION ALL is the last
step of the query, Hive on Tez will\n" +
"create a subdirectory for each branch of the UNION ALL. When this
property is enabled,\n" +
"the subdirectories are removed, and the files are renamed and moved
to the parent directory"),
+
HIVE_OPTIMIZE_MERGE_ADJACENT_UNION_DISTINCT("hive.optimize.merge.adjacent.union.distinct",
true,
+ "Whether to merge adjacent binary UNION DISTINCT into a single n-ary
UNION DISTINCT."),
HIVE_OPT_CORRELATION("hive.optimize.correlation", false, "exploit
intra-query correlations."),
HIVE_OPTIMIZE_LIMIT_TRANSPOSE("hive.optimize.limittranspose", false,
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/UnionDistinctMerger.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/UnionDistinctMerger.java
new file mode 100644
index 00000000000..e42c5d060fb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/UnionDistinctMerger.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.lib.SemanticDispatcher;
+import org.apache.hadoop.hive.ql.lib.SemanticGraphWalker;
+import org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor;
+import org.apache.hadoop.hive.ql.lib.SemanticRule;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+public class UnionDistinctMerger extends Transform {
+ private static final Logger LOG =
LoggerFactory.getLogger(UnionDistinctMerger.class);
+
+ private static final String PATTERN_STRING = new StringBuilder()
+ .append(UnionOperator.getOperatorName()).append("%")
+ .append(GroupByOperator.getOperatorName()).append("%")
+ .append(ReduceSinkOperator.getOperatorName()).append("%")
+ .append(GroupByOperator.getOperatorName()).append("%")
+ .append(UnionOperator.getOperatorName()).append("%")
+ .append(GroupByOperator.getOperatorName()).append("%")
+ .append(ReduceSinkOperator.getOperatorName()).append("%")
+ .append(GroupByOperator.getOperatorName()).append("%")
+ .toString();
+
+ private static class UnionMergeContext implements NodeProcessorCtx {
+ public final ParseContext pCtx;
+
+ public UnionMergeContext(ParseContext pCtx) {
+ this.pCtx = pCtx;
+ }
+ }
+
+ private class UnionMergeProcessor implements SemanticNodeProcessor {
+ @Override
+ public Void process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ UnionMergeContext context = (UnionMergeContext) procCtx;
+
+ // The stack contains at least 8 operators,
UNION-GBY-RS-GBY-UNION-GBY-RS-GBY.
+ // The leftmost UNION is on stack.size() - 8 and the rightmost GBY is on
stack.size() - 1.
+ Set<Operator> allOps = new HashSet<>(context.pCtx.getAllOps());
+ for (int i = 1; i <= 8; i ++) {
+ Operator<?> op = (Operator<?>) stack.get(stack.size() - i);
+
+ // We do not apply the optimization if some operators do not belong to
query plan.
+ // This can be happened when we already merged some UNIONs before.
+ // For example, Suppose that a query plan looks like the below graph:
+ // (1)UNION-GBY-RS-GBY-(3)UNION-GBY-RS-GBY
+ // (2)UNION-GBY-RS-GBY-
+ // Then we merge (1)-(3) to (1) and them move on to merging (2)-(3).
Without checking the presence of
+ // operators in (2) and (3), the merge process will fail as (3) is
already removed.
+ if (!allOps.contains(op)) {
+ return null;
+ }
+
+ // We do not apply the optimization if intermediate outputs are used
by other operators.
+ if (i != 1 && op.getChildOperators().size() > 1) {
+ return null;
+ }
+ }
+
+ UnionOperator upperUnionOperator = (UnionOperator)
stack.get(stack.size() - 8);
+ GroupByOperator upperFinalGroupByOperator = (GroupByOperator)
stack.get(stack.size() - 5);
+
+ UnionOperator lowerUnionOperator = (UnionOperator)
stack.get(stack.size() - 4);
+ GroupByOperator lowerFinalGroupByOperator = (GroupByOperator)
stack.get(stack.size() - 1);
+
+ // We can apply the optimization if there is no aggregators in final
GroupBy operators. The absence of
+ // aggregators ensures that we are merging two distinct computation.
+ if (upperFinalGroupByOperator.getConf().getAggregators().isEmpty() &&
+ lowerFinalGroupByOperator.getConf().getAggregators().isEmpty()) {
+ LOG.info("Detect duplicate UNION-DISTINCT GBY patterns. Remove the
latter one.");
+
+ // Step 0. UNION1->GBY1->RS1->GBY2->UNION2->GBY3->RS2->GBY4
+
+ // Step 1. Cut GBY2->UNION2
+ lowerUnionOperator.removeParent(upperFinalGroupByOperator);
+
+ // Step 2.
+ // Connect the parent of lowerUnionOperator and upperUnionOperator.
+ // Disconnect lowerUnionOperator from operator graph.
+ // Before step 2:
+ // {OP1, 2}-UNION1->GBY1->RS1->GBY2-{}
+ // {OP3, 4}-UNION2->GBY3->RS2->GBY4-{OP5, 6, ...}
+ // After step 2:
+ // {OP1, 2, 3, 4}-UNION1->GBY1->RS1->GBY2-{}
+ // {}-UNION2->GBY3->RS2->GBY4-{OP5, 6, ...}
+ for (Operator<?> lowerUnionParent:
lowerUnionOperator.getParentOperators()) {
+ lowerUnionParent.replaceChild(lowerUnionOperator,
upperUnionOperator);
+ upperUnionOperator.getParentOperators().add(lowerUnionParent);
+ }
+ lowerUnionOperator.setParentOperators(new ArrayList<>());
+
+ // Step 3.
+ // Connect upperFinalGroupByOperator and the children of
lowerFinalGroupByOperator.
+ // Disconnect lowerFinalGroupByOperator from operator graph.
+ // Before step 3:
+ // {OP1, 2, ...}-UNION1->GBY1->RS1->GBY2-{}
+ // {}-UNION2->GBY3->RS2->GBY4-{OP5, 6, ...}
+ // After step 3:
+ // {OP1, 2, ...}-UNION1->GBY1->RS1->GBY2-{OP5, 6, ...}
+ // {}-UNION2->GBY3->RS2->GBY4-{}
+ for (Operator<?> lowerFinalGroupByChild:
lowerFinalGroupByOperator.getChildOperators()) {
+ lowerFinalGroupByChild.replaceParent(lowerFinalGroupByOperator,
upperFinalGroupByOperator);
+
upperFinalGroupByOperator.getChildOperators().add(lowerFinalGroupByChild);
+ }
+
upperUnionOperator.getConf().setNumInputs(upperUnionOperator.getNumParent());
+ }
+
+ return null;
+ }
+ }
+
+ private static class NoSkipGraphWalker extends DefaultGraphWalker {
+ public NoSkipGraphWalker(SemanticDispatcher disp) {
+ super(disp);
+ }
+
+ @Override
+ public void startWalking(Collection<Node> startNodes,
+ HashMap<Node, Object> nodeOutput) throws SemanticException {
+ toWalk.addAll(startNodes);
+ while (!toWalk.isEmpty()) {
+ Node nd = toWalk.remove(0);
+ walk(nd);
+ // We need to revisit GroupBy operator for every distinct operator
path.
+ // GraphWalker uses retMap to determine if an operator has been
visited.
+ // Clearing it after each walk() ensures that we visit GroupBy
operator in every possible path.
+ retMap.clear();
+ }
+ }
+ }
+
+ public ParseContext transform(ParseContext pCtx) throws SemanticException {
+ Map<SemanticRule, SemanticNodeProcessor> testRules = new LinkedHashMap<>();
+ testRules.put(new RuleRegExp("AdjacentDistinctUnion", PATTERN_STRING), new
UnionMergeProcessor());
+ SemanticDispatcher disp = new DefaultRuleDispatcher(null, testRules, new
UnionMergeContext(pCtx));
+ SemanticGraphWalker ogw = new NoSkipGraphWalker(disp);
+
+ List<Node> topNodes = new ArrayList<>();
+ topNodes.addAll(pCtx.getTopOps().values());
+ ogw.startWalking(topNodes, null);
+
+ return pCtx;
+ }
+}
+
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 6c17e987889..6a32c84b9c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -119,6 +119,7 @@ import
org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import
org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
+import org.apache.hadoop.hive.ql.optimizer.UnionDistinctMerger;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
import org.apache.hadoop.hive.ql.plan.BaseWork;
@@ -213,6 +214,13 @@ public class TezCompiler extends TaskCompiler {
perfLogger.perfLogEnd(this.getClass().getName(),
PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization");
}
+ perfLogger.perfLogBegin(this.getClass().getName(),
PerfLogger.TEZ_COMPILER);
+ if
(procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_MERGE_ADJACENT_UNION_DISTINCT))
{
+ // This should be run before ReduceSinkDeDuplication in order not to
merge irrelevant GroupBy operators.
+ new UnionDistinctMerger().transform(procCtx.parseContext);
+ }
+ perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER,
"Run adjacent union distinct merger");
+
if(HiveConf.getBoolVar(procCtx.conf,
HiveConf.ConfVars.HIVE_OPT_REDUCE_DEDUPLICATION)) {
perfLogger.perfLogBegin(this.getClass().getName(),
PerfLogger.TEZ_COMPILER);
// Dynamic sort partition adds an extra RS therefore need to de-dup
diff --git a/ql/src/test/queries/clientpositive/explainuser_2.q
b/ql/src/test/queries/clientpositive/explainuser_2.q
index 1f905c594aa..6d1e2b3d244 100644
--- a/ql/src/test/queries/clientpositive/explainuser_2.q
+++ b/ql/src/test/queries/clientpositive/explainuser_2.q
@@ -6,6 +6,7 @@ set hive.strict.checks.bucketing=false;
set hive.explain.user=true;
set hive.metastore.aggregate.stats.cache.enabled=false;
set hive.cbo.fallback.strategy=NEVER;
+set hive.optimize.merge.adjacent.union.distinct=false;
-- SORT_QUERY_RESULTS
diff --git a/ql/src/test/queries/clientpositive/unionDistinct_3.q
b/ql/src/test/queries/clientpositive/unionDistinct_3.q
index 25828286776..1d9e74db7da 100644
--- a/ql/src/test/queries/clientpositive/unionDistinct_3.q
+++ b/ql/src/test/queries/clientpositive/unionDistinct_3.q
@@ -3,6 +3,7 @@
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
+set hive.optimize.merge.adjacent.union.distinct=false;
-- union2.q
diff --git a/ql/src/test/queries/clientpositive/union_distinct_hive_28488.q
b/ql/src/test/queries/clientpositive/union_distinct_hive_28488.q
new file mode 100644
index 00000000000..47dc589c9dd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_distinct_hive_28488.q
@@ -0,0 +1,60 @@
+set hive.optimize.shared.work=false;
+
+create table union_test (key string, value string);
+
+set hive.optimize.merge.adjacent.union.distinct=false;
+explain
+select * from (
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d1
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d2
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d3
+) d;
+
+set hive.optimize.merge.adjacent.union.distinct=true;
+explain
+select * from (
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d1
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d2
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d3
+) d;
+
diff --git
a/ql/src/test/results/clientpositive/llap/union_distinct_hive_28488.q.out
b/ql/src/test/results/clientpositive/llap/union_distinct_hive_28488.q.out
new file mode 100644
index 00000000000..fbd7e973ffa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/union_distinct_hive_28488.q.out
@@ -0,0 +1,789 @@
+PREHOOK: query: create table union_test (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_test
+POSTHOOK: query: create table union_test (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_test
+PREHOOK: query: explain
+select * from (
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d1
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d2
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d3
+) d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_test
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select * from (
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d1
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d2
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d3
+) d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 10 <- Union 2 (CONTAINS)
+ Map 11 <- Union 4 (CONTAINS)
+ Map 12 <- Union 13 (CONTAINS)
+ Map 17 <- Union 13 (CONTAINS)
+ Map 18 <- Union 15 (CONTAINS)
+ Map 19 <- Union 20 (CONTAINS)
+ Map 24 <- Union 20 (CONTAINS)
+ Map 25 <- Union 22 (CONTAINS)
+ Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS)
+ Reducer 16 <- Union 15 (SIMPLE_EDGE), Union 6 (CONTAINS)
+ Reducer 21 <- Union 20 (SIMPLE_EDGE), Union 22 (CONTAINS)
+ Reducer 23 <- Union 22 (SIMPLE_EDGE), Union 8 (CONTAINS)
+ Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+ Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS)
+ Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS)
+ Reducer 9 <- Union 8 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 17
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 19
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 24
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 25
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 14
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1
(type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reducer 16
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1
(type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reducer 21
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1
(type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reducer 23
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1
(type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1
(type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1
(type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1
(type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reducer 9
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 13
+ Vertex: Union 13
+ Union 15
+ Vertex: Union 15
+ Union 2
+ Vertex: Union 2
+ Union 20
+ Vertex: Union 20
+ Union 22
+ Vertex: Union 22
+ Union 4
+ Vertex: Union 4
+ Union 6
+ Vertex: Union 6
+ Union 8
+ Vertex: Union 8
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select * from (
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d1
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d2
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d3
+) d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_test
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select * from (
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d1
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d2
+ union
+ select * from (
+ select * from union_test
+ union
+ select * from union_test
+ union
+ select * from union_test
+ ) d3
+) d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 10 <- Union 2 (CONTAINS)
+ Map 11 <- Union 2 (CONTAINS)
+ Map 4 <- Union 2 (CONTAINS)
+ Map 5 <- Union 2 (CONTAINS)
+ Map 6 <- Union 2 (CONTAINS)
+ Map 7 <- Union 2 (CONTAINS)
+ Map 8 <- Union 2 (CONTAINS)
+ Map 9 <- Union 2 (CONTAINS)
+ Reducer 3 <- Union 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: union_test
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out
index 8422dee3b3a..d8a87abb66e 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query75.q.out
@@ -7,36 +7,34 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 25 <- Reducer 12 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE)
- Map 26 <- Map 1 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Reducer 22
(BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
- Map 31 <- Map 1 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Reducer 23
(BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
- Map 36 <- Reducer 33 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE)
- Map 5 <- Reducer 28 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE)
- Map 6 <- Map 1 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Reducer 2
(BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE)
- Reducer 11 <- Union 10 (SIMPLE_EDGE)
- Reducer 12 <- Map 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 13 <- Map 25 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE),
Union 14 (CONTAINS)
- Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS)
- Reducer 17 <- Union 16 (SIMPLE_EDGE)
- Reducer 18 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 17
(CUSTOM_SIMPLE_EDGE)
- Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+ Map 21 <- Reducer 10 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE)
+ Map 22 <- Map 1 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Reducer 18
(BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+ Map 27 <- Map 1 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Reducer 19
(BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
+ Map 32 <- Reducer 29 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE)
+ Map 5 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE)
+ Map 6 <- Map 1 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Reducer 2
(BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE)
+ Reducer 10 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 11 <- Map 21 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE),
Union 12 (CONTAINS)
+ Reducer 13 <- Union 12 (SIMPLE_EDGE)
+ Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 9
(CUSTOM_SIMPLE_EDGE)
+ Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
+ Reducer 16 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 18 <- Map 17 (SIMPLE_EDGE)
+ Reducer 19 <- Map 17 (SIMPLE_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 20 <- Map 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 22 <- Map 21 (SIMPLE_EDGE)
- Reducer 23 <- Map 21 (SIMPLE_EDGE)
- Reducer 24 <- Map 21 (SIMPLE_EDGE)
- Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE),
Union 8 (CONTAINS)
- Reducer 28 <- Map 26 (CUSTOM_SIMPLE_EDGE)
- Reducer 29 <- Map 26 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE),
Union 14 (CONTAINS)
+ Reducer 20 <- Map 17 (SIMPLE_EDGE)
+ Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE),
Union 8 (CONTAINS)
+ Reducer 24 <- Map 22 (CUSTOM_SIMPLE_EDGE)
+ Reducer 25 <- Map 22 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE),
Union 12 (CONTAINS)
+ Reducer 26 <- Map 22 (CUSTOM_SIMPLE_EDGE)
+ Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE), Map 32
(CUSTOM_SIMPLE_EDGE), Union 8 (CONTAINS)
+ Reducer 29 <- Map 27 (CUSTOM_SIMPLE_EDGE)
Reducer 3 <- Map 1 (SIMPLE_EDGE)
- Reducer 30 <- Map 26 (CUSTOM_SIMPLE_EDGE)
- Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE), Map 36
(CUSTOM_SIMPLE_EDGE), Union 10 (CONTAINS)
- Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE)
- Reducer 34 <- Map 31 (CUSTOM_SIMPLE_EDGE), Map 36
(CUSTOM_SIMPLE_EDGE), Union 16 (CONTAINS)
- Reducer 35 <- Map 31 (CUSTOM_SIMPLE_EDGE)
+ Reducer 30 <- Map 27 (CUSTOM_SIMPLE_EDGE), Map 32
(CUSTOM_SIMPLE_EDGE), Union 12 (CONTAINS)
+ Reducer 31 <- Map 27 (CUSTOM_SIMPLE_EDGE)
Reducer 4 <- Map 1 (SIMPLE_EDGE)
- Reducer 7 <- Map 25 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE),
Union 8 (CONTAINS)
- Reducer 9 <- Union 10 (CONTAINS), Union 8 (SIMPLE_EDGE)
+ Reducer 7 <- Map 21 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE),
Union 8 (CONTAINS)
+ Reducer 9 <- Union 8 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -96,7 +94,7 @@ STAGE PLANS:
value expressions: _col1 (type: int), _col2 (type:
int), _col3 (type: int), _col4 (type: int)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 21
+ Map 17
Map Operator Tree:
TableScan
alias: date_dim
@@ -136,13 +134,13 @@ STAGE PLANS:
Target Input: store_sales
Partition key expr: ss_sold_date_sk
Statistics: Num rows: 367 Data size: 2936 Basic
stats: COMPLETE Column stats: COMPLETE
- Target Vertex: Map 26
+ Target Vertex: Map 22
Dynamic Partitioning Event Operator
Target column: ws_sold_date_sk (bigint)
Target Input: web_sales
Partition key expr: ws_sold_date_sk
Statistics: Num rows: 367 Data size: 2936 Basic
stats: COMPLETE Column stats: COMPLETE
- Target Vertex: Map 31
+ Target Vertex: Map 27
Reduce Output Operator
key expressions: _col0 (type: bigint)
null sort order: z
@@ -189,13 +187,13 @@ STAGE PLANS:
Target Input: web_sales
Partition key expr: ws_sold_date_sk
Statistics: Num rows: 367 Data size: 2936 Basic
stats: COMPLETE Column stats: COMPLETE
- Target Vertex: Map 31
+ Target Vertex: Map 27
Dynamic Partitioning Event Operator
Target column: ss_sold_date_sk (bigint)
Target Input: store_sales
Partition key expr: ss_sold_date_sk
Statistics: Num rows: 367 Data size: 2936 Basic
stats: COMPLETE Column stats: COMPLETE
- Target Vertex: Map 26
+ Target Vertex: Map 22
Reduce Output Operator
key expressions: _col0 (type: bigint)
null sort order: z
@@ -210,7 +208,7 @@ STAGE PLANS:
Statistics: Num rows: 367 Data size: 2936 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 25
+ Map 21
Map Operator Tree:
TableScan
alias: catalog_returns
@@ -246,7 +244,7 @@ STAGE PLANS:
value expressions: _col2 (type: int), _col3 (type:
decimal(7,2))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 26
+ Map 22
Map Operator Tree:
TableScan
alias: store_sales
@@ -263,7 +261,7 @@ STAGE PLANS:
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Reducer 22
+ 1 Reducer 18
Statistics: Num rows: 16583283491 Data size:
1963216325036 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
@@ -305,7 +303,7 @@ STAGE PLANS:
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Map 21
+ 1 Map 17
Statistics: Num rows: 16583283491 Data size:
1963216325036 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
@@ -341,7 +339,7 @@ STAGE PLANS:
value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 31
+ Map 27
Map Operator Tree:
TableScan
alias: web_sales
@@ -358,7 +356,7 @@ STAGE PLANS:
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Reducer 23
+ 1 Reducer 19
Statistics: Num rows: 4340155038 Data size: 572587087908
Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
@@ -400,7 +398,7 @@ STAGE PLANS:
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Map 21
+ 1 Map 17
Statistics: Num rows: 4340155038 Data size: 572587087908
Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
@@ -436,7 +434,7 @@ STAGE PLANS:
value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 36
+ Map 32
Map Operator Tree:
TableScan
alias: web_returns
@@ -525,7 +523,7 @@ STAGE PLANS:
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Map 21
+ 1 Map 17
Statistics: Num rows: 8582195972 Data size:
1120372034864 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
@@ -567,7 +565,7 @@ STAGE PLANS:
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Reducer 24
+ 1 Reducer 20
Statistics: Num rows: 8582195972 Data size:
1120372034864 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
@@ -603,28 +601,7 @@ STAGE PLANS:
value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Reducer 11
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2
(type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type:
decimal(8,2))
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 7061070159 Data size: 932061249496 Basic
stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col4), sum(_col5)
- keys: _col0 (type: int), _col1 (type: int), _col2 (type:
int), _col3 (type: int)
- mode: complete
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 177920028 Data size: 24197123536 Basic
stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int)
- null sort order: zzzz
- sort order: ++++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
- Statistics: Num rows: 177920028 Data size: 24197123536
Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col4 (type: bigint), _col5 (type:
decimal(18,2))
- Reducer 12
+ Reducer 10
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -637,7 +614,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE
Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
- Reducer 13
+ Reducer 11
Execution mode: vectorized, llap
Reduce Operator Tree:
Map Join Operator
@@ -648,7 +625,7 @@ STAGE PLANS:
1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1
(type: bigint)
outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10,
_col13, _col14
input vertices:
- 1 Map 25
+ 1 Map 21
Statistics: Num rows: 2017213214 Data size: 388166715564 Basic
stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Select Operator
@@ -665,9 +642,9 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
null sort order: zzzzzz
sort order: ++++++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5
(type: decimal(8,2))
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
- Reducer 15
+ Reducer 13
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -675,26 +652,6 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 6029744178 Data size: 795926226580 Basic
stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: int), _col1 (type: int), _col2 (type:
int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2))
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
- null sort order: zzzzzz
- sort order: ++++++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
- Reducer 17
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2
(type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type:
decimal(8,2))
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 7061070159 Data size: 932061249496 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col4), sum(_col5)
keys: _col0 (type: int), _col1 (type: int), _col2 (type:
int), _col3 (type: int)
@@ -708,7 +665,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
Statistics: Num rows: 177920028 Data size: 24197123536
Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col4 (type: bigint), _col5 (type:
decimal(18,2))
- Reducer 18
+ Reducer 14
Execution mode: vectorized, llap
Reduce Operator Tree:
Map Join Operator
@@ -719,7 +676,7 @@ STAGE PLANS:
1 KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type:
int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col10, _col11
input vertices:
- 1 Reducer 11
+ 1 Reducer 9
Statistics: Num rows: 32072478585127 Data size:
8210554517792240 Basic stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Filter Operator
@@ -741,7 +698,7 @@ STAGE PLANS:
sort order: +
Statistics: Num rows: 10690826195042 Data size:
1625005581646208 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type:
int), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type:
bigint), _col7 (type: decimal(19,2))
- Reducer 19
+ Reducer 15
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -762,20 +719,7 @@ STAGE PLANS:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3
(type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 41585 Data size: 996412 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: int),
_col3 (type: int), _col4 (type: int)
- Reducer 20
+ Reducer 16
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -788,7 +732,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE
Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
- Reducer 22
+ Reducer 18
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -800,7 +744,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: bigint)
Statistics: Num rows: 367 Data size: 2936 Basic stats:
COMPLETE Column stats: COMPLETE
- Reducer 23
+ Reducer 19
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -812,7 +756,20 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: bigint)
Statistics: Num rows: 367 Data size: 2936 Basic stats:
COMPLETE Column stats: COMPLETE
- Reducer 24
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3
(type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 41585 Data size: 996412 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: int),
_col3 (type: int), _col4 (type: int)
+ Reducer 20
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -824,7 +781,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: bigint)
Statistics: Num rows: 367 Data size: 2936 Basic stats:
COMPLETE Column stats: COMPLETE
- Reducer 27
+ Reducer 23
Execution mode: vectorized, llap
Reduce Operator Tree:
Map Join Operator
@@ -852,9 +809,9 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
null sort order: zzzzzz
sort order: ++++++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5
(type: decimal(8,2))
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
- Reducer 28
+ Reducer 24
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -867,7 +824,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE
Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
- Reducer 29
+ Reducer 25
Execution mode: vectorized, llap
Reduce Operator Tree:
Map Join Operator
@@ -895,22 +852,9 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
null sort order: zzzzzz
sort order: ++++++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5
(type: decimal(8,2))
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
- Reducer 3
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3
(type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 41585 Data size: 996412 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: int),
_col3 (type: int), _col4 (type: int)
- Reducer 30
+ Reducer 26
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -923,7 +867,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE
Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
- Reducer 32
+ Reducer 28
Execution mode: vectorized, llap
Reduce Operator Tree:
Map Join Operator
@@ -934,7 +878,7 @@ STAGE PLANS:
1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1
(type: bigint)
outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10,
_col13, _col14
input vertices:
- 1 Map 36
+ 1 Map 32
Statistics: Num rows: 1031325981 Data size: 198862953200 Basic
stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Select Operator
@@ -946,14 +890,14 @@ STAGE PLANS:
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
null sort order: zzzzzz
sort order: ++++++
Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
- Reducer 33
+ Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 29
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -966,7 +910,20 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE
Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
- Reducer 34
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3
(type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 41585 Data size: 996412 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: int),
_col3 (type: int), _col4 (type: int)
+ Reducer 30
Execution mode: vectorized, llap
Reduce Operator Tree:
Map Join Operator
@@ -977,7 +934,7 @@ STAGE PLANS:
1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1
(type: bigint)
outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10,
_col13, _col14
input vertices:
- 1 Map 36
+ 1 Map 32
Statistics: Num rows: 1031325981 Data size: 198862953200 Basic
stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Select Operator
@@ -989,14 +946,14 @@ STAGE PLANS:
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
null sort order: zzzzzz
sort order: ++++++
Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
- Reducer 35
+ Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 31
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -1033,7 +990,7 @@ STAGE PLANS:
1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1
(type: bigint)
outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10,
_col13, _col14
input vertices:
- 1 Map 25
+ 1 Map 21
Statistics: Num rows: 2017213214 Data size: 388166715564 Basic
stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Select Operator
@@ -1050,7 +1007,7 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
null sort order: zzzzzz
sort order: ++++++
- Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5
(type: decimal(8,2))
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
Statistics: Num rows: 6029744178 Data size: 795926226580
Basic stats: COMPLETE Column stats: COMPLETE
Reducer 9
Execution mode: vectorized, llap
@@ -1061,23 +1018,20 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 6029744178 Data size: 795926226580 Basic
stats: COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: int), _col1 (type: int), _col2 (type:
int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2))
- minReductionHashAggr: 0.4
- mode: hash
+ aggregations: sum(_col4), sum(_col5)
+ keys: _col0 (type: int), _col1 (type: int), _col2 (type:
int), _col3 (type: int)
+ mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 177920028 Data size: 24197123536 Basic
stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type:
decimal(8,2))
- null sort order: zzzzzz
- sort order: ++++++
+ key expressions: _col0 (type: int), _col1 (type: int),
_col2 (type: int), _col3 (type: int)
+ null sort order: zzzz
+ sort order: ++++
Map-reduce partition columns: _col0 (type: int), _col1
(type: int), _col2 (type: int), _col3 (type: int)
- Statistics: Num rows: 7061070159 Data size: 932061249496
Basic stats: COMPLETE Column stats: COMPLETE
- Union 10
- Vertex: Union 10
- Union 14
- Vertex: Union 14
- Union 16
- Vertex: Union 16
+ Statistics: Num rows: 177920028 Data size: 24197123536
Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col4 (type: bigint), _col5 (type:
decimal(18,2))
+ Union 12
+ Vertex: Union 12
Union 8
Vertex: Union 8