This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new eabb98a9a3a HIVE-28853: SharedWorkOptimizer should always consider
MapJoin operators' InMemoryDataSize. (#5717)
eabb98a9a3a is described below
commit eabb98a9a3a7433abb1145e5af986afddd201b63
Author: seonggon <[email protected]>
AuthorDate: Thu Jun 5 17:39:32 2025 +0900
HIVE-28853: SharedWorkOptimizer should always consider MapJoin operators'
InMemoryDataSize. (#5717)
---
.../hive/ql/optimizer/SharedWorkOptimizer.java | 72 +-
.../sharedwork_mapjoin_datasize_check.q | 108 ++
.../llap/sharedwork_mapjoin_datasize_check.q.out | 1809 ++++++++++++++++++++
3 files changed, 1974 insertions(+), 15 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index 67dcfebcb74..d264528387d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -1439,8 +1439,7 @@ private static boolean
areSupportedDppUnionOps(ParseContext pctx, SharedWorkOpti
}
private static SharedResult
extractSharedOptimizationInfoForRoot(ParseContext pctx,
- SharedWorkOptimizerCache optimizerCache,
- TableScanOperator retainableTsOp,
+ SharedWorkOptimizerCache optimizerCache, TableScanOperator
retainableTsOp,
TableScanOperator discardableTsOp, boolean mayRemoveDownStreamOperators,
boolean mayRemoveInputOps)
throws SemanticException {
LinkedHashSet<Operator<?>> retainableOps = new LinkedHashSet<>();
@@ -1456,12 +1455,12 @@ private static SharedResult
extractSharedOptimizationInfoForRoot(ParseContext pc
if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache,
discardableOps));
- return new SharedResult(retainableOps, discardableOps,
discardableInputOps,
- dataSize, maxDataSize);
+ return createSharedResultForRoot(optimizerCache, retainableTsOp,
discardableTsOp,
+ retainableOps, discardableOps, discardableInputOps);
}
+
if (retainableTsOp.getChildOperators().size() == 0 ||
discardableTsOp.getChildOperators().size() == 0) {
- return new SharedResult(retainableOps, discardableOps,
discardableInputOps,
- dataSize, maxDataSize);
+ return new SharedResult(retainableOps, discardableOps,
discardableInputOps, dataSize, maxDataSize);
}
Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
@@ -1487,14 +1486,13 @@ private static SharedResult
extractSharedOptimizationInfoForRoot(ParseContext pc
equalOp2 = currentOp2;
retainableOps.add(equalOp1);
discardableOps.add(equalOp2);
- if (currentOp1.getChildOperators().size() > 1 ||
- currentOp2.getChildOperators().size() > 1) {
+ if (currentOp1.getNumChild() > 1 || currentOp2.getNumChild() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache,
discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache,
retainableOps,
discardableInputOps));
- return new SharedResult(retainableOps, discardableOps,
discardableInputOps,
- dataSize, maxDataSize);
+ return createSharedResultForRoot(optimizerCache, retainableTsOp,
discardableTsOp,
+ retainableOps, discardableOps, discardableInputOps);
}
currentOp1 = currentOp1.getChildOperators().get(0);
currentOp2 = currentOp2.getChildOperators().get(0);
@@ -1503,8 +1501,8 @@ private static SharedResult
extractSharedOptimizationInfoForRoot(ParseContext pc
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache,
discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache,
retainableOps,
discardableInputOps));
- return new SharedResult(retainableOps, discardableOps,
discardableInputOps,
- dataSize, maxDataSize);
+ return createSharedResultForRoot(optimizerCache, retainableTsOp,
discardableTsOp,
+ retainableOps, discardableOps, discardableInputOps);
}
}
@@ -1513,6 +1511,44 @@ private static SharedResult
extractSharedOptimizationInfoForRoot(ParseContext pc
mayRemoveInputOps);
}
+ private static SharedResult createSharedResultForRoot(
+ SharedWorkOptimizerCache optimizerCache,
+ Operator<?> retainableOp,
+ Operator<?> discardableOp,
+ LinkedHashSet<Operator<?>> retainableOps,
+ LinkedHashSet<Operator<?>> discardableOps,
+ Set<Operator<?>> discardableInputOps) {
+ // Assertion: retainableOps and discardableOps do not contain
MapJoinOperator.
+
+ // Accumulate InMemoryDataSize of unmerged MapJoin operators.
+ long dataSize = 0L;
+ long maxDataSize = 0L;
+
+ Set<Operator<?>> opsWork1 = findWorkOperators(optimizerCache,
retainableOp);
+ for (Operator<?> op : opsWork1) {
+ if (op instanceof MapJoinOperator) {
+ MapJoinOperator mop = (MapJoinOperator) op;
+ dataSize = StatsUtils.safeAdd(dataSize,
mop.getConf().getInMemoryDataSize());
+ if (maxDataSize <
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
+ maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ }
+ }
+ }
+ Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache,
discardableOp);
+ for (Operator<?> op : opsWork2) {
+ if (op instanceof MapJoinOperator) {
+ MapJoinOperator mop = (MapJoinOperator) op;
+ dataSize = StatsUtils.safeAdd(dataSize,
mop.getConf().getInMemoryDataSize());
+ if (maxDataSize <
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
+ maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ }
+ }
+ }
+
+ return new SharedResult(retainableOps, discardableOps,
discardableInputOps, dataSize, maxDataSize);
+ }
+
+
private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache,
Operator<?> retainableOpEqualParent,
@@ -1590,7 +1626,9 @@ private static SharedResult
extractSharedOptimizationInfo(ParseContext pctx,
if (equalOp1 instanceof MapJoinOperator) {
MapJoinOperator mop = (MapJoinOperator) equalOp1;
dataSize = StatsUtils.safeAdd(dataSize,
mop.getConf().getInMemoryDataSize());
- maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ if (maxDataSize <
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
+ maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ }
}
if (currentOp1.getChildOperators().size() > 1 ||
currentOp2.getChildOperators().size() > 1) {
@@ -1608,7 +1646,9 @@ private static SharedResult
extractSharedOptimizationInfo(ParseContext pctx,
if (op instanceof MapJoinOperator && !retainableOps.contains(op)) {
MapJoinOperator mop = (MapJoinOperator) op;
dataSize = StatsUtils.safeAdd(dataSize,
mop.getConf().getInMemoryDataSize());
- maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ if (maxDataSize <
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
+ maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ }
}
}
Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache, currentOp2);
@@ -1616,7 +1656,9 @@ private static SharedResult
extractSharedOptimizationInfo(ParseContext pctx,
if (op instanceof MapJoinOperator && !discardableOps.contains(op)) {
MapJoinOperator mop = (MapJoinOperator) op;
dataSize = StatsUtils.safeAdd(dataSize,
mop.getConf().getInMemoryDataSize());
- maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ if (maxDataSize <
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
+ maxDataSize =
mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
+ }
}
}
diff --git
a/ql/src/test/queries/clientpositive/sharedwork_mapjoin_datasize_check.q
b/ql/src/test/queries/clientpositive/sharedwork_mapjoin_datasize_check.q
new file mode 100644
index 00000000000..c4ec003e70f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sharedwork_mapjoin_datasize_check.q
@@ -0,0 +1,108 @@
+--! qt:dataset:src
+--! qt:dataset:src1
+
+set hive.auto.convert.join=true;
+set hive.llap.mapjoin.memory.oversubscribe.factor=0;
+set hive.auto.convert.join.noconditionaltask.size=500;
+
+-- The InMemoryDataSize of MapJoin is 280. Therefore, SWO should not merge 2
TSs reading src
+-- as the sum of InMemoryDataSize of 2 unmerged MapJoin exceeds 500.
+-- TSs are identical and FILs are not identical.
+explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000001 and src1.value > 1000001
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+)
+select * from aa join bb on aa.a = bb.a;
+
+
+-- The InMemoryDataSize of MapJoin is 280. Since the limit is 1000, SWO should
not merge 4 TSs into a single TS.
+-- TSs are identical and TS.getNumChild() > 1.
+set hive.auto.convert.join.noconditionaltask.size=1000;
+explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, 2 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000001 and src1.value > 1000001
+),
+c as (
+ select src.key a, 3 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000002 and src1.value > 1000002
+),
+d as (
+ select src.key a, 4 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000003 and src1.value > 1000003
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+),
+cc as (
+ select a, avg(b) as b, sum(c) as c from c group by a
+),
+dd as (
+ select a, avg(b) as b, sum(c) as c from d group by a
+)
+select * from aa join bb join cc join dd on aa.a = bb.a and aa.a = cc.a and
aa.a = dd.a;
+
+-- The InMemoryDataSize of MapJoin is 280. Since the limit is 1000, SWO should
not merge 4 TSs into a single TS.
+-- TSs, FILs are identical and FIL.getNumChild() > 1.
+set hive.auto.convert.join.noconditionaltask.size=1000;
+explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, 2 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000001
+),
+c as (
+ select src.key a, 3 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000002
+),
+d as (
+ select src.key a, 4 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000003
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+),
+cc as (
+ select a, avg(b) as b, sum(c) as c from c group by a
+),
+dd as (
+ select a, avg(b) as b, sum(c) as c from d group by a
+)
+select * from aa join bb join cc join dd on aa.a = bb.a and aa.a = cc.a and
aa.a = dd.a;
diff --git
a/ql/src/test/results/clientpositive/llap/sharedwork_mapjoin_datasize_check.q.out
b/ql/src/test/results/clientpositive/llap/sharedwork_mapjoin_datasize_check.q.out
new file mode 100644
index 00000000000..cc909ea745f
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/sharedwork_mapjoin_datasize_check.q.out
@@ -0,0 +1,1809 @@
+PREHOOK: query: explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000001 and src1.value > 1000001
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+)
+select * from aa join bb on aa.a = bb.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000001 and src1.value > 1000001
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+)
+select * from aa join bb on aa.a = bb.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t5`.`$f0`, `t12`.`$f1`, `t12`.`$f2`, `t5`.`$f0` AS
`$f00`, `t5`.`$f1` AS `$f10`, `t5`.`$f2` AS `$f20`
+FROM (SELECT `t0`.`key` AS `$f0`, SUM(CAST(`t0`.`value` AS DOUBLE)) /
COUNT(CAST(`t0`.`value` AS DOUBLE)) AS `$f1`, SUM(`t2`.`value`) AS `$f2`
+FROM (SELECT `key`, `value`
+FROM `default`.`src`
+WHERE `value` > 1000001 AND `key` IS NOT NULL) AS `t0`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000001 AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` =
`t2`.`key`
+GROUP BY `t0`.`key`) AS `t5`
+INNER JOIN (SELECT `t7`.`key` AS `$f0`, SUM(CAST(`t7`.`value` AS DOUBLE)) /
COUNT(CAST(`t7`.`value` AS DOUBLE)) AS `$f1`, SUM(`t9`.`value`) AS `$f2`
+FROM (SELECT `key`, `value`
+FROM `default`.`src`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t7`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t9` ON `t7`.`key` =
`t9`.`key`
+GROUP BY `t7`.`key`) AS `t12` ON `t5`.`$f0` = `t12`.`$f0`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 5 (BROADCAST_EDGE)
+ Map 3 <- Map 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((UDFToDouble(value) > 1000001.0D) and key is
not null) (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_87_container,
bigKeyColName:key, smallTablePos:1, keyRatio:0.002
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000001.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 5 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type:
string), UDFToDouble(_col1) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col2), count(_col2), sum(_col1)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 5 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type:
string), UDFToDouble(_col1) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col2), count(_col2), sum(_col1)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (((UDFToDouble(value) > 1000000.0D) and key is
not null) or ((UDFToDouble(value) > 1000001.0D) and key is not null)) (type:
boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000001.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src1
+ name: default.src1
+ Truncated Path -> Alias:
+ /src1 [src1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 4 => 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ input vertices:
+ 1 Reducer 4
+ Position of Big Table: 0
+ Statistics: Num rows: 1 Data size: 119 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col4 (type: double),
_col5 (type: double), _col0 (type: string), _col1 (type: double), _col2 (type:
double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5
+ Statistics: Num rows: 1 Data size: 206 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 206 Basic stats:
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types
string:double:double:string:double:double
+ escape.delim \
+
hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Reducer 4
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type:
double)
+ auto parallelism: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, 2 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000001 and src1.value > 1000001
+),
+c as (
+ select src.key a, 3 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000002 and src1.value > 1000002
+),
+d as (
+ select src.key a, 4 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000003 and src1.value > 1000003
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+),
+cc as (
+ select a, avg(b) as b, sum(c) as c from c group by a
+),
+dd as (
+ select a, avg(b) as b, sum(c) as c from d group by a
+)
+select * from aa join bb join cc join dd on aa.a = bb.a and aa.a = cc.a and
aa.a = dd.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, 2 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000001 and src1.value > 1000001
+),
+c as (
+ select src.key a, 3 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000002 and src1.value > 1000002
+),
+d as (
+ select src.key a, 4 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000003 and src1.value > 1000003
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+),
+cc as (
+ select a, avg(b) as b, sum(c) as c from c group by a
+),
+dd as (
+ select a, avg(b) as b, sum(c) as c from d group by a
+)
+select * from aa join bb join cc join dd on aa.a = bb.a and aa.a = cc.a and
aa.a = dd.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t4`.`$f0`, `t11`.`$f1`, `t11`.`$f2`, `t4`.`$f0` AS
`$f00`, `t4`.`$f1` AS `$f10`, `t4`.`$f2` AS `$f20`, `t17`.`$f0` AS `$f01`,
`t17`.`$f1` AS `$f11`, `t17`.`$f2` AS `$f21`, `t23`.`$f0` AS `$f02`,
`t23`.`$f1` AS `$f12`, `t23`.`$f2` AS `$f22`
+FROM (SELECT `t0`.`key` AS `$f0`, SUM(`t0`.`b`) / COUNT(`t0`.`b`) AS `$f1`,
SUM(`t2`.`value`) AS `$f2`
+FROM (SELECT `key`, 2 * CAST(`value` AS DOUBLE) AS `b`
+FROM `default`.`src`
+WHERE `value` > 1000001 AND `key` IS NOT NULL) AS `t0`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000001 AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` =
`t2`.`key`
+GROUP BY `t0`.`key`) AS `t4`
+INNER JOIN (SELECT `t6`.`key` AS `$f0`, SUM(CAST(`t6`.`value` AS DOUBLE)) /
COUNT(CAST(`t6`.`value` AS DOUBLE)) AS `$f1`, SUM(`t8`.`value`) AS `$f2`
+FROM (SELECT `key`, `value`
+FROM `default`.`src`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t6`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t8` ON `t6`.`key` =
`t8`.`key`
+GROUP BY `t6`.`key`) AS `t11` ON `t4`.`$f0` = `t11`.`$f0`
+INNER JOIN (SELECT `t13`.`key` AS `$f0`, SUM(`t13`.`b`) / COUNT(`t13`.`b`) AS
`$f1`, SUM(`t15`.`value`) AS `$f2`
+FROM (SELECT `key`, 3 * CAST(`value` AS DOUBLE) AS `b`
+FROM `default`.`src`
+WHERE `value` > 1000002 AND `key` IS NOT NULL) AS `t13`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000002 AND `key` IS NOT NULL) AS `t15` ON `t13`.`key` =
`t15`.`key`
+GROUP BY `t13`.`key`) AS `t17` ON `t4`.`$f0` = `t17`.`$f0`
+INNER JOIN (SELECT `t19`.`key` AS `$f0`, SUM(`t19`.`b`) / COUNT(`t19`.`b`) AS
`$f1`, SUM(`t21`.`value`) AS `$f2`
+FROM (SELECT `key`, 4 * CAST(`value` AS DOUBLE) AS `b`
+FROM `default`.`src`
+WHERE `value` > 1000003 AND `key` IS NOT NULL) AS `t19`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000003 AND `key` IS NOT NULL) AS `t21` ON `t19`.`key` =
`t21`.`key`
+GROUP BY `t19`.`key`) AS `t23` ON `t4`.`$f0` = `t23`.`$f0`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7
(BROADCAST_EDGE)
+ Map 8 <- Map 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE), Reducer
3 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 7 <- Map 5 (SIMPLE_EDGE)
+ Reducer 9 <- Map 8 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: (((UDFToDouble(value) > 1000001.0D) and key is
not null) or ((UDFToDouble(value) > 1000002.0D) and key is not null) or
((UDFToDouble(value) > 1000000.0D) and key is not null)) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000001.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), (2.0D *
UDFToDouble(value)) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 15770 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 5 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 1472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), count(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000002.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), (3.0D *
UDFToDouble(value)) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 15770 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 7 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Reducer 7
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 1472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), count(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 6 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Reducer 6
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type:
string), UDFToDouble(_col1) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col2), count(_col2), sum(_col1)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (((UDFToDouble(value) > 1000001.0D) and key is
not null) or ((UDFToDouble(value) > 1000000.0D) and key is not null) or
((UDFToDouble(value) > 1000002.0D) and key is not null) or ((UDFToDouble(value)
> 1000003.0D) and key is not null)) (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000001.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000002.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000003.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src1
+ name: default.src1
+ Truncated Path -> Alias:
+ /src1 [src1]
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((UDFToDouble(value) > 1000003.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000003.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), (4.0D *
UDFToDouble(value)) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 15770 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 5 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 1472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), count(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 0
+ value expressions: _col1 (type: double), _col2 (type:
double)
+ auto parallelism: true
+ Reducer 3
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type:
double)
+ auto parallelism: true
+ Reducer 4
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 2 => 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ input vertices:
+ 0 Reducer 2
+ Position of Big Table: 1
+ Statistics: Num rows: 1 Data size: 119 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 3 => 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5,
_col6, _col7, _col8
+ input vertices:
+ 1 Reducer 3
+ Position of Big Table: 0
+ Statistics: Num rows: 1 Data size: 222 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 9 => 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11
+ input vertices:
+ 1 Reducer 9
+ Position of Big Table: 0
+ Statistics: Num rows: 1 Data size: 325 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col4 (type:
double), _col5 (type: double), _col0 (type: string), _col1 (type: double),
_col2 (type: double), _col6 (type: string), _col7 (type: double), _col8 (type:
double), _col9 (type: string), _col10 (type: double), _col11 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3,
_col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 412 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 412 Basic
stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns
_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11
+ columns.types
string:double:double:string:double:double:string:double:double:string:double:double
+ escape.delim \
+
hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Reducer 6
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: string)
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ bucketingVersion: -2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Reducer 7
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: string)
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ bucketingVersion: -2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Reducer 9
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type:
double)
+ auto parallelism: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, 2 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000001
+),
+c as (
+ select src.key a, 3 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000002
+),
+d as (
+ select src.key a, 4 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000003
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+),
+cc as (
+ select a, avg(b) as b, sum(c) as c from c group by a
+),
+dd as (
+ select a, avg(b) as b, sum(c) as c from d group by a
+)
+select * from aa join bb join cc join dd on aa.a = bb.a and aa.a = cc.a and
aa.a = dd.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+with
+a as (
+ select src.key a, src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
+),
+b as (
+ select src.key a, 2 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000001
+),
+c as (
+ select src.key a, 3 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000002
+),
+d as (
+ select src.key a, 4 * src.value b, src1.value c
+ from src, src1
+ where src.key = src1.key and src.value > 1000000 and src1.value > 1000003
+),
+aa as (
+ select a, avg(b) as b, sum(c) as c from a group by a
+),
+bb as (
+ select a, avg(b) as b, sum(c) as c from b group by a
+),
+cc as (
+ select a, avg(b) as b, sum(c) as c from c group by a
+),
+dd as (
+ select a, avg(b) as b, sum(c) as c from d group by a
+)
+select * from aa join bb join cc join dd on aa.a = bb.a and aa.a = cc.a and
aa.a = dd.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t4`.`$f0`, `t11`.`$f1`, `t11`.`$f2`, `t4`.`$f0` AS
`$f00`, `t4`.`$f1` AS `$f10`, `t4`.`$f2` AS `$f20`, `t17`.`$f0` AS `$f01`,
`t17`.`$f1` AS `$f11`, `t17`.`$f2` AS `$f21`, `t23`.`$f0` AS `$f02`,
`t23`.`$f1` AS `$f12`, `t23`.`$f2` AS `$f22`
+FROM (SELECT `t0`.`key` AS `$f0`, SUM(`t0`.`b`) / COUNT(`t0`.`b`) AS `$f1`,
SUM(`t2`.`value`) AS `$f2`
+FROM (SELECT `key`, 2 * CAST(`value` AS DOUBLE) AS `b`
+FROM `default`.`src`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t0`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000001 AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` =
`t2`.`key`
+GROUP BY `t0`.`key`) AS `t4`
+INNER JOIN (SELECT `t6`.`key` AS `$f0`, SUM(CAST(`t6`.`value` AS DOUBLE)) /
COUNT(CAST(`t6`.`value` AS DOUBLE)) AS `$f1`, SUM(`t8`.`value`) AS `$f2`
+FROM (SELECT `key`, `value`
+FROM `default`.`src`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t6`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t8` ON `t6`.`key` =
`t8`.`key`
+GROUP BY `t6`.`key`) AS `t11` ON `t4`.`$f0` = `t11`.`$f0`
+INNER JOIN (SELECT `t13`.`key` AS `$f0`, SUM(`t13`.`b`) / COUNT(`t13`.`b`) AS
`$f1`, SUM(`t15`.`value`) AS `$f2`
+FROM (SELECT `key`, 3 * CAST(`value` AS DOUBLE) AS `b`
+FROM `default`.`src`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t13`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000002 AND `key` IS NOT NULL) AS `t15` ON `t13`.`key` =
`t15`.`key`
+GROUP BY `t13`.`key`) AS `t17` ON `t4`.`$f0` = `t17`.`$f0`
+INNER JOIN (SELECT `t19`.`key` AS `$f0`, SUM(`t19`.`b`) / COUNT(`t19`.`b`) AS
`$f1`, SUM(`t21`.`value`) AS `$f2`
+FROM (SELECT `key`, 4 * CAST(`value` AS DOUBLE) AS `b`
+FROM `default`.`src`
+WHERE `value` > 1000000 AND `key` IS NOT NULL) AS `t19`
+INNER JOIN (SELECT `key`, `value`
+FROM `default`.`src1`
+WHERE `value` > 1000003 AND `key` IS NOT NULL) AS `t21` ON `t19`.`key` =
`t21`.`key`
+GROUP BY `t19`.`key`) AS `t23` ON `t4`.`$f0` = `t23`.`$f0`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7
(BROADCAST_EDGE)
+ Map 8 <- Map 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE), Reducer
3 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 7 <- Map 5 (SIMPLE_EDGE)
+ Reducer 9 <- Map 8 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), (2.0D *
UDFToDouble(value)) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 15770 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 5 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 1472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), count(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Select Operator
+ expressions: key (type: string), (3.0D *
UDFToDouble(value)) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 15770 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 7 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Reducer 7
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 1472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), count(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 6 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Reducer 6
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type:
string), UDFToDouble(_col1) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col2), count(_col2), sum(_col1)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (((UDFToDouble(value) > 1000001.0D) and key is
not null) or ((UDFToDouble(value) > 1000000.0D) and key is not null) or
((UDFToDouble(value) > 1000002.0D) and key is not null) or ((UDFToDouble(value)
> 1000003.0D) and key is not null)) (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000001.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000002.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 0
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000003.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src1
+ name: default.src1
+ Truncated Path -> Alias:
+ /src1 [src1]
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(value) > 1000000.0D) and key is
not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), (4.0D *
UDFToDouble(value)) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 15770 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 5 => 8
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 8 Data size: 1472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), count(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 111 Basic
stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: double), _col2
(type: bigint), _col3 (type: double)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 0
+ value expressions: _col1 (type: double), _col2 (type:
double)
+ auto parallelism: true
+ Reducer 3
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type:
double)
+ auto parallelism: true
+ Reducer 4
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 2 => 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ input vertices:
+ 0 Reducer 2
+ Position of Big Table: 1
+ Statistics: Num rows: 1 Data size: 119 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 3 => 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5,
_col6, _col7, _col8
+ input vertices:
+ 1 Reducer 3
+ Position of Big Table: 0
+ Statistics: Num rows: 1 Data size: 222 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 9 => 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11
+ input vertices:
+ 1 Reducer 9
+ Position of Big Table: 0
+ Statistics: Num rows: 1 Data size: 325 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col4 (type:
double), _col5 (type: double), _col0 (type: string), _col1 (type: double),
_col2 (type: double), _col6 (type: string), _col7 (type: double), _col8 (type:
double), _col9 (type: string), _col10 (type: double), _col11 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3,
_col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 412 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 412 Basic
stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns
_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11
+ columns.types
string:double:double:string:double:double:string:double:double:string:double:double
+ escape.delim \
+
hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Reducer 6
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: string)
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ bucketingVersion: -2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Reducer 7
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: string)
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ bucketingVersion: -2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 8 Data size: 1400 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: string)
+ auto parallelism: true
+ Reducer 9
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1),
sum(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type:
double), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: 1
+ value expressions: _col1 (type: double), _col2 (type:
double)
+ auto parallelism: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+