This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new afe05b96802 HIVE-28490: Stop removing retainable DPP sources (Seonggon
Namgung, reviewed by Denys Kuzmenko)
afe05b96802 is described below
commit afe05b968026dfdda631de1e2b090665f0820ef3
Author: seonggon <[email protected]>
AuthorDate: Tue Nov 19 20:33:05 2024 +0900
HIVE-28490: Stop removing retainable DPP sources (Seonggon Namgung,
reviewed by Denys Kuzmenko)
Closes #5425
---
.../hive/ql/optimizer/SharedWorkOptimizer.java | 109 +-
.../sharedwork_dpp_removal_hive_28490.q | 79 ++
.../llap/sharedwork_dpp_removal_hive_28490.q.out | 1433 ++++++++++++++++++++
3 files changed, 1532 insertions(+), 89 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index d2948cb7057..6ffc21a8a49 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -28,6 +28,7 @@ import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -1195,7 +1196,7 @@ public class SharedWorkOptimizer extends Transform {
Operator<?> op = dppsOp1.get(i);
if (op instanceof ReduceSinkOperator) {
Set<Operator<?>> ascendants =
- findAscendantWorkOperators(pctx, optimizerCache, op);
+ findAscendantOperators(optimizerCache, op);
if (ascendants.contains(tsOp2)) {
// This should not happen, we cannot merge
return false;
@@ -1206,7 +1207,7 @@ public class SharedWorkOptimizer extends Transform {
Operator<?> op = dppsOp2.get(i);
if (op instanceof ReduceSinkOperator) {
Set<Operator<?>> ascendants =
- findAscendantWorkOperators(pctx, optimizerCache, op);
+ findAscendantOperators(optimizerCache, op);
if (ascendants.contains(tsOp1)) {
// This should not happen, we cannot merge
return false;
@@ -1633,8 +1634,7 @@ public class SharedWorkOptimizer extends Transform {
Collection<Operator<?>> c = optimizerCache.tableScanToDPPSource
.get((TableScanOperator) op);
for (Operator<?> dppSource : c) {
- Set<Operator<?>> ascendants =
- findAscendantWorkOperators(pctx, optimizerCache, dppSource);
+ Set<Operator<?>> ascendants = findAscendantOperators(optimizerCache,
dppSource);
if (!Collections.disjoint(ascendants, discardedOps)) {
// Remove branch
removeBranch(dppSource, dppBranches, ops, optimizerCache);
@@ -1938,97 +1938,28 @@ public class SharedWorkOptimizer extends Transform {
}
- private static Set<Operator<?>> findParentWorkOperators(ParseContext pctx,
- SharedWorkOptimizerCache optimizerCache, Operator<?> start) {
- return findParentWorkOperators(pctx, optimizerCache, start,
ImmutableSet.of());
- }
-
- private static Set<Operator<?>> findParentWorkOperators(ParseContext pctx,
- SharedWorkOptimizerCache optimizerCache, Operator<?> start,
- Set<Operator<?>> excludeOps) {
- // Find operators in work
- Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
- // Gather input works operators
- Set<Operator<?>> set = new HashSet<Operator<?>>();
- for (Operator<?> op : workOps) {
- if (op.getParentOperators() != null) {
- for (Operator<?> parent : op.getParentOperators()) {
- if (parent instanceof ReduceSinkOperator &&
!excludeOps.contains(parent)) {
- set.addAll(findWorkOperators(optimizerCache, parent));
- }
- }
- }
- if (op instanceof TableScanOperator) {
- // Check for DPP and semijoin DPP
- for (Operator<?> parent :
optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) {
- if (!excludeOps.contains(parent)) {
- set.addAll(findWorkOperators(optimizerCache, parent));
- }
- }
- }
- }
- return set;
- }
-
- private static Set<Operator<?>> findAscendantWorkOperators(ParseContext pctx,
- SharedWorkOptimizerCache optimizerCache, Operator<?> start) {
- // Find operators in work
- Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
- // Gather input works operators
- Set<Operator<?>> result = new HashSet<Operator<?>>();
- Set<Operator<?>> set;
- while (!workOps.isEmpty()) {
- set = new HashSet<Operator<?>>();
- for (Operator<?> op : workOps) {
+ private static Set<Operator<?>>
findAscendantOperators(SharedWorkOptimizerCache optimizerCache,
+ Operator<?> start) {
+ Set<Operator<?>> visited = new HashSet<>();
+ visited.add(start);
+
+ // Gather input operators
+ Queue<Operator<?>> remaining = new
LinkedList<>(start.getParentOperators());
+ while (!remaining.isEmpty()) {
+ Operator<?> op = remaining.poll();
+ if (!visited.contains(op)) {
+ visited.add(op);
if (op.getParentOperators() != null) {
- for (Operator<?> parent : op.getParentOperators()) {
- if (parent instanceof ReduceSinkOperator) {
- set.addAll(findWorkOperators(optimizerCache, parent));
- }
- }
- } else if (op instanceof TableScanOperator) {
+ remaining.addAll(op.getParentOperators());
+ }
+ if (op instanceof TableScanOperator) {
// Check for DPP and semijoin DPP
- for (Operator<?> parent :
optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) {
- set.addAll(findWorkOperators(optimizerCache, parent));
- }
+
remaining.addAll(optimizerCache.tableScanToDPPSource.get((TableScanOperator)
op));
}
}
- workOps = set;
- result.addAll(set);
}
- return result;
- }
- private static Set<Operator<?>> findChildWorkOperators(ParseContext pctx,
- SharedWorkOptimizerCache optimizerCache, Operator<?> start, boolean
traverseEventOperators) {
- // Find operators in work
- Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
- // Gather output works operators
- Set<Operator<?>> set = new HashSet<Operator<?>>();
- for (Operator<?> op : workOps) {
- if (op instanceof ReduceSinkOperator) {
- if (op.getChildOperators() != null) {
- // All children of RS are descendants
- for (Operator<?> child : op.getChildOperators()) {
- set.addAll(findWorkOperators(optimizerCache, child));
- }
- }
- // Semijoin DPP work is considered a child because work needs
- // to finish for it to execute
- SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
- if (sjbi != null) {
- set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
- }
- } else if(op.getConf() instanceof DynamicPruningEventDesc) {
- // DPP work is considered a child because work needs
- // to finish for it to execute
- if (traverseEventOperators) {
- set.addAll(findWorkOperators(
- optimizerCache, ((DynamicPruningEventDesc)
op.getConf()).getTableScan()));
- }
- }
- }
- return set;
+ return visited;
}
private static Set<Operator<?>> findDescendantWorkOperators(ParseContext
pctx,
diff --git
a/ql/src/test/queries/clientpositive/sharedwork_dpp_removal_hive_28490.q
b/ql/src/test/queries/clientpositive/sharedwork_dpp_removal_hive_28490.q
new file mode 100644
index 00000000000..1988b8b220d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sharedwork_dpp_removal_hive_28490.q
@@ -0,0 +1,79 @@
+create table x2_date_dim (d_date_sk bigint, d_week_seq string, d_date string);
+create table x2_item (i_item_sk bigint, i_item_id string);
+create table x2_store_returns
+(sr_returned_date_sk bigint, sr_item_sk bigint, sr_return_quantity int,
sr_some_field string, sr_other_field string);
+create table x2_catalog_returns
+(cr_returned_date_sk bigint, cr_item_sk bigint, cr_return_quantity int,
cr_some_field string, cr_other_field string);
+
+alter table x2_date_dim update statistics set('numRows'='35',
'rawDataSize'='81449');
+alter table x2_item update statistics set('numRows'='12345',
'rawDataSize'='123456');
+alter table x2_store_returns update statistics set('numRows'='123456',
'rawDataSize'='1234567');
+alter table x2_catalog_returns update statistics set('numRows'='123456',
'rawDataSize'='1234567');
+
+set hive.auto.convert.join=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=30; -- This should be less
than numRows of x2_date_dim
+set hive.tez.dynamic.semijoin.reduction.threshold=0.0; -- In order not to
remove any SemiJoin branch
+set hive.tez.dynamic.semijoin.reduction.for.mapjoin=true; -- In order not to
remove any SemiJoin branch
+
+-- To check whether the original query plan contains the following pattern:
+-- date_dim ─┐
+-- date_dim ─┴ MapJoin ─(DPP)─ date_dim ─ (... catalog_returns)
+-- date_dim ─┐
+-- date_dim ─┴ MapJoin ─(DPP)─ date_dim ─ (... store_returns)
+
+set hive.optimize.shared.work=false;
+explain
+with sr_items as (
+ select i_item_id item_id, sum(sr_return_quantity) sr_item_qty
+ from x2_store_returns, x2_item, x2_date_dim
+ where
+ sr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ sr_returned_date_sk = d_date_sk group by i_item_id
+),
+cr_items as (
+ select i_item_id item_id, sum(cr_return_quantity) cr_item_qty
+ from x2_catalog_returns, x2_item, x2_date_dim
+ where
+ cr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ cr_returned_date_sk = d_date_sk group by i_item_id
+)
+select sr_items.item_id, sr_item_qty, cr_item_qty
+from sr_items, cr_items
+where sr_items.item_id=cr_items.item_id;
+
+set hive.optimize.shared.work=true;
+explain
+with sr_items as (
+ select i_item_id item_id, sum(sr_return_quantity) sr_item_qty
+ from x2_store_returns, x2_item, x2_date_dim
+ where
+ sr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ sr_returned_date_sk = d_date_sk group by i_item_id
+),
+cr_items as (
+ select i_item_id item_id, sum(cr_return_quantity) cr_item_qty
+ from x2_catalog_returns, x2_item, x2_date_dim
+ where
+ cr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ cr_returned_date_sk = d_date_sk group by i_item_id
+)
+select sr_items.item_id, sr_item_qty, cr_item_qty
+from sr_items, cr_items
+where sr_items.item_id=cr_items.item_id;
diff --git
a/ql/src/test/results/clientpositive/llap/sharedwork_dpp_removal_hive_28490.q.out
b/ql/src/test/results/clientpositive/llap/sharedwork_dpp_removal_hive_28490.q.out
new file mode 100644
index 00000000000..000ccd2f8da
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/sharedwork_dpp_removal_hive_28490.q.out
@@ -0,0 +1,1433 @@
+PREHOOK: query: create table x2_date_dim (d_date_sk bigint, d_week_seq string,
d_date string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x2_date_dim
+POSTHOOK: query: create table x2_date_dim (d_date_sk bigint, d_week_seq
string, d_date string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x2_date_dim
+PREHOOK: query: create table x2_item (i_item_sk bigint, i_item_id string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x2_item
+POSTHOOK: query: create table x2_item (i_item_sk bigint, i_item_id string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x2_item
+PREHOOK: query: create table x2_store_returns
+(sr_returned_date_sk bigint, sr_item_sk bigint, sr_return_quantity int,
sr_some_field string, sr_other_field string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x2_store_returns
+POSTHOOK: query: create table x2_store_returns
+(sr_returned_date_sk bigint, sr_item_sk bigint, sr_return_quantity int,
sr_some_field string, sr_other_field string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x2_store_returns
+PREHOOK: query: create table x2_catalog_returns
+(cr_returned_date_sk bigint, cr_item_sk bigint, cr_return_quantity int,
cr_some_field string, cr_other_field string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x2_catalog_returns
+POSTHOOK: query: create table x2_catalog_returns
+(cr_returned_date_sk bigint, cr_item_sk bigint, cr_return_quantity int,
cr_some_field string, cr_other_field string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x2_catalog_returns
+PREHOOK: query: alter table x2_date_dim update statistics set('numRows'='35',
'rawDataSize'='81449')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x2_date_dim
+PREHOOK: Output: default@x2_date_dim
+POSTHOOK: query: alter table x2_date_dim update statistics set('numRows'='35',
'rawDataSize'='81449')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x2_date_dim
+POSTHOOK: Output: default@x2_date_dim
+PREHOOK: query: alter table x2_item update statistics set('numRows'='12345',
'rawDataSize'='123456')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x2_item
+PREHOOK: Output: default@x2_item
+POSTHOOK: query: alter table x2_item update statistics set('numRows'='12345',
'rawDataSize'='123456')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x2_item
+POSTHOOK: Output: default@x2_item
+PREHOOK: query: alter table x2_store_returns update statistics
set('numRows'='123456', 'rawDataSize'='1234567')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x2_store_returns
+PREHOOK: Output: default@x2_store_returns
+POSTHOOK: query: alter table x2_store_returns update statistics
set('numRows'='123456', 'rawDataSize'='1234567')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x2_store_returns
+POSTHOOK: Output: default@x2_store_returns
+PREHOOK: query: alter table x2_catalog_returns update statistics
set('numRows'='123456', 'rawDataSize'='1234567')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x2_catalog_returns
+PREHOOK: Output: default@x2_catalog_returns
+POSTHOOK: query: alter table x2_catalog_returns update statistics
set('numRows'='123456', 'rawDataSize'='1234567')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x2_catalog_returns
+POSTHOOK: Output: default@x2_catalog_returns
+PREHOOK: query: explain
+with sr_items as (
+ select i_item_id item_id, sum(sr_return_quantity) sr_item_qty
+ from x2_store_returns, x2_item, x2_date_dim
+ where
+ sr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ sr_returned_date_sk = d_date_sk group by i_item_id
+),
+cr_items as (
+ select i_item_id item_id, sum(cr_return_quantity) cr_item_qty
+ from x2_catalog_returns, x2_item, x2_date_dim
+ where
+ cr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ cr_returned_date_sk = d_date_sk group by i_item_id
+)
+select sr_items.item_id, sr_item_qty, cr_item_qty
+from sr_items, cr_items
+where sr_items.item_id=cr_items.item_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x2_catalog_returns
+PREHOOK: Input: default@x2_date_dim
+PREHOOK: Input: default@x2_item
+PREHOOK: Input: default@x2_store_returns
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+with sr_items as (
+ select i_item_id item_id, sum(sr_return_quantity) sr_item_qty
+ from x2_store_returns, x2_item, x2_date_dim
+ where
+ sr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ sr_returned_date_sk = d_date_sk group by i_item_id
+),
+cr_items as (
+ select i_item_id item_id, sum(cr_return_quantity) cr_item_qty
+ from x2_catalog_returns, x2_item, x2_date_dim
+ where
+ cr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ cr_returned_date_sk = d_date_sk group by i_item_id
+)
+select sr_items.item_id, sr_item_qty, cr_item_qty
+from sr_items, cr_items
+where sr_items.item_id=cr_items.item_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x2_catalog_returns
+POSTHOOK: Input: default@x2_date_dim
+POSTHOOK: Input: default@x2_item
+POSTHOOK: Input: default@x2_store_returns
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 7
(BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE)
+ Map 11 <- Map 14 (BROADCAST_EDGE), Map 16 (BROADCAST_EDGE), Map 18
(BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE)
+ Map 14 <- Reducer 19 (BROADCAST_EDGE)
+ Map 18 <- Map 20 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE)
+ Map 3 <- Reducer 8 (BROADCAST_EDGE)
+ Map 5 <- Reducer 13 (BROADCAST_EDGE)
+ Map 7 <- Map 9 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE)
+ Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
+ Reducer 12 <- Map 11 (SIMPLE_EDGE)
+ Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE)
+ Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE)
+ Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE)
+ Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (BROADCAST_EDGE)
+ Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x2_catalog_returns
+ filterExpr: (cr_item_sk is not null and cr_returned_date_sk
is not null and cr_returned_date_sk BETWEEN
DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN
DynamicValue(RS_13_x2_item_i_item_sk_min) AND
DynamicValue(RS_13_x2_item_i_item_sk_max) and
in_bloom_filter(cr_returned_date_sk,
DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item_ [...]
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_215_container,
bigKeyColName:cr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709
+ Statistics: Num rows: 123456 Data size: 2345700 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cr_item_sk is not null and cr_returned_date_sk
is not null and cr_returned_date_sk BETWEEN
DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN
DynamicValue(RS_13_x2_item_i_item_sk_min) AND
DynamicValue(RS_13_x2_item_i_item_sk_max) and
in_bloom_filter(cr_returned_date_sk,
DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item [...]
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_returned_date_sk (type: bigint),
cr_item_sk (type: bigint), cr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col4
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 122223 Data size: 2322276 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col2, _col4, _col6
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col6 (type:
string), _col4 (type: string)
+ outputColumnNames: _col2, _col4, _col6
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col6 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col4
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 147889 Data size: 2809953
Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: x2_store_returns
+ filterExpr: (sr_item_sk is not null and sr_returned_date_sk
is not null and sr_returned_date_sk BETWEEN
DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN
DynamicValue(RS_51_x2_item_i_item_sk_min) AND
DynamicValue(RS_51_x2_item_i_item_sk_max) and
in_bloom_filter(sr_returned_date_sk,
DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item_ [...]
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_218_container,
bigKeyColName:sr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709
+ Statistics: Num rows: 123456 Data size: 2345700 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_item_sk is not null and sr_returned_date_sk
is not null and sr_returned_date_sk BETWEEN
DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN
DynamicValue(RS_51_x2_item_i_item_sk_min) AND
DynamicValue(RS_51_x2_item_i_item_sk_max) and
in_bloom_filter(sr_returned_date_sk,
DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item [...]
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: bigint),
sr_item_sk (type: bigint), sr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col4
+ input vertices:
+ 1 Map 14
+ Statistics: Num rows: 122223 Data size: 2322276 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col2, _col4, _col6
+ input vertices:
+ 1 Map 16
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col6 (type:
string), _col4 (type: string)
+ outputColumnNames: _col2, _col4, _col6
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col6 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col4
+ input vertices:
+ 1 Map 18
+ Statistics: Num rows: 147889 Data size: 2809953
Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 14
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: (d_date_sk is not null and d_date is not null
and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND
DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date,
DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 35 Data size: 6720 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_date is not null
and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND
DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date,
DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: bigint), d_date (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 16
+ Map Operator Tree:
+ TableScan
+ alias: x2_item
+ filterExpr: (i_item_sk is not null and i_item_id is not
null) (type: boolean)
+ Statistics: Num rows: 12345 Data size: 2251968 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk is not null and i_item_id is not
null) (type: boolean)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: bigint), i_item_id (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 18
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: (d_week_seq is not null and d_date is not null
and d_week_seq BETWEEN DynamicValue(RS_63_x2_date_dim_d_week_seq_min) AND
DynamicValue(RS_63_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq,
DynamicValue(RS_63_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_220_container,
bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:1.0285714285714285
+ Statistics: Num rows: 35 Data size: 12880 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_week_seq is not null and d_date is not null
and d_week_seq BETWEEN DynamicValue(RS_63_x2_date_dim_d_week_seq_min) AND
DynamicValue(RS_63_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq,
DynamicValue(RS_63_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 33 Data size: 12144 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: string), d_date (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 33 Data size: 12144 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 20
+ Statistics: Num rows: 36 Data size: 13358 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 512 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 512 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string),
_col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 20
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: ((d_date) IN ('1998-01-02', '1998-10-15',
'1998-11-10') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 35 Data size: 12880 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_date) IN ('1998-01-02', '1998-10-15',
'1998-11-10') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 14 Data size: 5152 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 880 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1
(type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: (d_date_sk is not null and d_date is not null
and d_date BETWEEN DynamicValue(RS_31_x2_date_dim_d_date_min) AND
DynamicValue(RS_31_x2_date_dim_d_date_max) and in_bloom_filter(d_date,
DynamicValue(RS_31_x2_date_dim_d_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 35 Data size: 6720 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_date is not null
and d_date BETWEEN DynamicValue(RS_31_x2_date_dim_d_date_min) AND
DynamicValue(RS_31_x2_date_dim_d_date_max) and in_bloom_filter(d_date,
DynamicValue(RS_31_x2_date_dim_d_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: bigint), d_date (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: x2_item
+ filterExpr: (i_item_sk is not null and i_item_id is not null
and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND
DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id,
DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 12345 Data size: 2251968 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk is not null and i_item_id is not
null and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND
DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id,
DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: bigint), i_item_id (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: (d_week_seq is not null and d_date is not null
and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND
DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq,
DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_217_container,
bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:1.0285714285714285
+ Statistics: Num rows: 35 Data size: 12880 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_week_seq is not null and d_date is not null
and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND
DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq,
DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 33 Data size: 12144 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: string), d_date (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 33 Data size: 12144 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 9
+ Statistics: Num rows: 36 Data size: 13358 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 512 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 512 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string),
_col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: ((d_date) IN ('1998-01-02', '1998-10-15',
'1998-11-10') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 35 Data size: 12880 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_date) IN ('1998-01-02', '1998-10-15',
'1998-11-10') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 14 Data size: 5152 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 880 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1
(type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 10
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 12
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0,
expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 428 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 428 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 13
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 15
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 17
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 19
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Reducer 12
+ Statistics: Num rows: 81338 Data size: 1545462 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: bigint),
_col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 81338 Data size: 1545462 Basic
stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 81338 Data size: 1545462 Basic
stats: COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 21
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 8
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+with sr_items as (
+ select i_item_id item_id, sum(sr_return_quantity) sr_item_qty
+ from x2_store_returns, x2_item, x2_date_dim
+ where
+ sr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ sr_returned_date_sk = d_date_sk group by i_item_id
+),
+cr_items as (
+ select i_item_id item_id, sum(cr_return_quantity) cr_item_qty
+ from x2_catalog_returns, x2_item, x2_date_dim
+ where
+ cr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ cr_returned_date_sk = d_date_sk group by i_item_id
+)
+select sr_items.item_id, sr_item_qty, cr_item_qty
+from sr_items, cr_items
+where sr_items.item_id=cr_items.item_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x2_catalog_returns
+PREHOOK: Input: default@x2_date_dim
+PREHOOK: Input: default@x2_item
+PREHOOK: Input: default@x2_store_returns
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+with sr_items as (
+ select i_item_id item_id, sum(sr_return_quantity) sr_item_qty
+ from x2_store_returns, x2_item, x2_date_dim
+ where
+ sr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ sr_returned_date_sk = d_date_sk group by i_item_id
+),
+cr_items as (
+ select i_item_id item_id, sum(cr_return_quantity) cr_item_qty
+ from x2_catalog_returns, x2_item, x2_date_dim
+ where
+ cr_item_sk = i_item_sk and
+ d_date in (
+ select d_date from x2_date_dim
+ where d_week_seq in (
+ select d_week_seq from x2_date_dim where d_date in
('1998-01-02','1998-10-15','1998-11-10'))) and
+ cr_returned_date_sk = d_date_sk group by i_item_id
+)
+select sr_items.item_id, sr_item_qty, cr_item_qty
+from sr_items, cr_items
+where sr_items.item_id=cr_items.item_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x2_catalog_returns
+POSTHOOK: Input: default@x2_date_dim
+POSTHOOK: Input: default@x2_item
+POSTHOOK: Input: default@x2_store_returns
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 12 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5
(BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+ Map 12 <- Reducer 6 (BROADCAST_EDGE)
+ Map 3 <- Reducer 11 (BROADCAST_EDGE)
+ Map 5 <- Map 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+ Map 9 <- Map 12 (BROADCAST_EDGE), Map 15 (BROADCAST_EDGE), Map 5
(BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE)
+ Reducer 10 <- Map 9 (SIMPLE_EDGE)
+ Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE)
+ Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE)
+ Reducer 14 <- Map 12 (CUSTOM_SIMPLE_EDGE)
+ Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE)
+ Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x2_catalog_returns
+ filterExpr: (cr_item_sk is not null and cr_returned_date_sk
is not null and cr_returned_date_sk BETWEEN
DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN
DynamicValue(RS_13_x2_item_i_item_sk_min) AND
DynamicValue(RS_13_x2_item_i_item_sk_max) and
in_bloom_filter(cr_returned_date_sk,
DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item_ [...]
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_215_container,
bigKeyColName:cr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709
+ Statistics: Num rows: 123456 Data size: 2345700 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cr_item_sk is not null and cr_returned_date_sk
is not null and cr_returned_date_sk BETWEEN
DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN
DynamicValue(RS_13_x2_item_i_item_sk_min) AND
DynamicValue(RS_13_x2_item_i_item_sk_max) and
in_bloom_filter(cr_returned_date_sk,
DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item [...]
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cr_returned_date_sk (type: bigint),
cr_item_sk (type: bigint), cr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col4
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 122223 Data size: 2322276 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col2, _col4, _col6
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col6 (type:
string), _col4 (type: string)
+ outputColumnNames: _col2, _col4, _col6
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col6 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col4
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 147889 Data size: 2809953
Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: (d_date_sk is not null and d_date is not null
and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND
DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date,
DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 35 Data size: 6720 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_date_sk is not null and d_date is not null
and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND
DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date,
DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_date_sk (type: bigint), d_date (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 33 Data size: 6336 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: x2_item
+ filterExpr: (i_item_sk is not null and i_item_id is not
null) (type: boolean)
+ Statistics: Num rows: 12345 Data size: 2251968 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk is not null and i_item_id is not
null) (type: boolean)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: bigint), i_item_id (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: x2_item
+ filterExpr: (i_item_sk is not null and i_item_id is not null
and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND
DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id,
DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 12345 Data size: 2251968 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (i_item_sk is not null and i_item_id is not
null and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND
DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id,
DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i_item_sk (type: bigint), i_item_id (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 11111 Data size: 2026862 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1
(type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: (d_week_seq is not null and d_date is not null
and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND
DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq,
DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 35 Data size: 12880 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_week_seq is not null and d_date is not null
and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND
DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq,
DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 33 Data size: 12144 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: string), d_date (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 33 Data size: 12144 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ input vertices:
+ 1 Map 7
+ Statistics: Num rows: 36 Data size: 13358 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 36 Data size: 13358 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 512 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 512 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string),
_col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: x2_date_dim
+ filterExpr: ((d_date) IN ('1998-01-02', '1998-10-15',
'1998-11-10') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 35 Data size: 12880 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((d_date) IN ('1998-01-02', '1998-10-15',
'1998-11-10') and d_week_seq is not null) (type: boolean)
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_week_seq (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 14 Data size: 5152 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 14 Data size: 5152 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 880 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1
(type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: x2_store_returns
+ filterExpr: (sr_item_sk is not null and sr_returned_date_sk
is not null and sr_returned_date_sk BETWEEN
DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN
DynamicValue(RS_51_x2_item_i_item_sk_min) AND
DynamicValue(RS_51_x2_item_i_item_sk_max) and
in_bloom_filter(sr_returned_date_sk,
DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item_ [...]
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_218_container,
bigKeyColName:sr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709
+ Statistics: Num rows: 123456 Data size: 2345700 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (sr_item_sk is not null and sr_returned_date_sk
is not null and sr_returned_date_sk BETWEEN
DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND
DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN
DynamicValue(RS_51_x2_item_i_item_sk_min) AND
DynamicValue(RS_51_x2_item_i_item_sk_max) and
in_bloom_filter(sr_returned_date_sk,
DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and
in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item [...]
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sr_returned_date_sk (type: bigint),
sr_item_sk (type: bigint), sr_return_quantity (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 111112 Data size: 2111160 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col1, _col2, _col4
+ input vertices:
+ 1 Map 12
+ Statistics: Num rows: 122223 Data size: 2322276 Basic
stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col2, _col4, _col6
+ input vertices:
+ 1 Map 15
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col6 (type:
string), _col4 (type: string)
+ outputColumnNames: _col2, _col4, _col6
+ Statistics: Num rows: 134445 Data size: 2554503
Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col6 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col4
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 147889 Data size: 2809953
Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ keys: _col4 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type:
string)
+ Statistics: Num rows: 147889 Data size:
2809953 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 10
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0,
expectedEntries=1000000)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 428 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 428 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 11
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 13
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 14
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 16
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 73944 Data size: 1404966 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Reducer 10
+ Statistics: Num rows: 81338 Data size: 1545462 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col3 (type: bigint),
_col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 81338 Data size: 1545462 Basic
stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 81338 Data size: 1545462 Basic
stats: COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: binary)
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+ Reducer 8
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, 1, expectedEntries=1000000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+