HIVE-21007: Semi join + Union can lead to wrong plans (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9493dcfd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9493dcfd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9493dcfd Branch: refs/heads/master Commit: 9493dcfd4737c0fc57f5786646642d2ec2af9518 Parents: b42fdc2 Author: Vineet Garg <vg...@apache.org> Authored: Mon Dec 10 09:23:22 2018 -0800 Committer: Vineet Garg <vg...@apache.org> Committed: Mon Dec 10 09:23:22 2018 -0800 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/TezCompiler.java | 56 ++++ .../llap/dynamic_semijoin_reduction_3.q.out | 73 +---- .../clientpositive/perf/tez/cbo_query54.q.out | 2 +- .../perf/tez/constraints/query54.q.out | 251 ++++++++--------- .../perf/tez/constraints/query8.q.out | 138 +++++----- .../clientpositive/perf/tez/query54.q.out | 271 +++++++++---------- .../clientpositive/perf/tez/query8.q.out | 138 +++++----- 7 files changed, 458 insertions(+), 471 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 02cebdc..4b10e89 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -481,6 +481,12 @@ public class TezCompiler extends TaskCompiler { markSemiJoinForDPP(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); + // Remove any semi join edges from Union Op + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + removeSemiJoinEdgesForUnion(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, + "Remove any semi join edge between Union and RS"); + // Remove any parallel edge between semijoin and mapjoin. perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemijoinsParallelToMapJoin(procCtx); @@ -1314,6 +1320,56 @@ public class TezCompiler extends TaskCompiler { } /* + * Given an operator this method removes all semi join edges downstream (children) until it hits RS + */ + private void removeSemiJoinEdges(Operator<?> op, OptimizeTezProcContext procCtx, + Map<ReduceSinkOperator, TableScanOperator> sjToRemove) throws SemanticException { + if(op instanceof ReduceSinkOperator && op.getNumChild() == 0) { + Map<ReduceSinkOperator, SemiJoinBranchInfo> sjMap = procCtx.parseContext.getRsToSemiJoinBranchInfo(); + if(sjMap.get(op) != null) { + sjToRemove.put((ReduceSinkOperator)op, sjMap.get(op).getTsOp()); + } + } + + for(Operator<?> child:op.getChildOperators()) { + removeSemiJoinEdges(child, procCtx, sjToRemove); + } + } + + private void removeSemiJoinEdgesForUnion(OptimizeTezProcContext procCtx) throws SemanticException{ + // Get all the TS ops. + List<Operator<?>> topOps = new ArrayList<>(); + topOps.addAll(procCtx.parseContext.getTopOps().values()); + Set<Operator<?>> unionOps = new HashSet<>(); + + Map<ReduceSinkOperator, TableScanOperator> sjToRemove = new HashMap<>(); + for (Operator<?> parent : topOps) { + Deque<Operator<?>> deque = new LinkedList<>(); + deque.add(parent); + while (!deque.isEmpty()) { + Operator<?> op = deque.pollLast(); + if (op instanceof UnionOperator && !unionOps.contains(op)) { + unionOps.add(op); + removeSemiJoinEdges(op, procCtx, sjToRemove); + } + deque.addAll(op.getChildOperators()); + } + } + // remove sj + if (sjToRemove.size() > 0) { + for (Map.Entry<ReduceSinkOperator, TableScanOperator> entry : sjToRemove.entrySet()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Semijoin optimization with Union operator. Removing semijoin " + + OperatorUtils.getOpNamePretty(entry.getKey()) + " - " + + OperatorUtils.getOpNamePretty(sjToRemove.get(entry.getKey()))); + } + GenTezUtils.removeBranch(entry.getKey()); + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, entry.getKey(), entry.getValue()); + } + } + } + + /* * The algorithm looks at all the mapjoins in the operator pipeline until * it hits RS Op and for each mapjoin examines if it has paralllel semijoin * edge or dynamic partition pruning. http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index cb3740d..ecf6323 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -596,15 +596,13 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 10 <- Union 2 (CONTAINS) - Map 11 <- Reducer 9 (BROADCAST_EDGE) - Reducer 3 <- Map 11 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Map 9 <- Union 2 (CONTAINS) + Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -626,24 +624,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map 10 Map Operator Tree: TableScan + alias: t + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 9 + Map Operator Tree: + TableScan alias: nonacidorctbl filterExpr: (b > 0) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -660,37 +658,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 11 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl__col0_min) AND DynamicValue(RS_10_nonacidorctbl__col0_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl__col0_bloom_filter))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -878,18 +847,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Union 2 Vertex: Union 2 http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out index fd5f6d7..50fa078 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -1,6 +1,6 @@ Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with my_customers as ( http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out index 7a0750e..8d10899 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -133,9 +133,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 24 (BROADCAST_EDGE) -Map 18 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS) -Map 25 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS) +Map 18 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) +Map 24 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE) @@ -144,16 +143,15 @@ Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 11 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 20 <- Map 26 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE) -Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 29 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 20 <- Map 25 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE) +Reducer 21 <- Map 27 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 28 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) -Reducer 4 <- Map 30 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 32 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 4 <- Map 29 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 31 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) @@ -164,25 +162,25 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_358] - Limit [LIM_357] (rows=1 width=16) + File Output Operator [FS_353] + Limit [LIM_352] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_356] (rows=1 width=16) + Select Operator [SEL_351] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_355] - Select Operator [SEL_354] (rows=1 width=16) + SHUFFLE [RS_350] + Select Operator [SEL_349] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_353] (rows=1 width=12) + Group By Operator [GBY_348] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + SHUFFLE [RS_347] PartitionCols:_col0 - Group By Operator [GBY_351] (rows=1 width=12) + Group By Operator [GBY_346] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_350] (rows=1 width=116) + Select Operator [SEL_345] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_349] (rows=1 width=116) + Group By Operator [GBY_344] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_118] @@ -202,42 +200,42 @@ Stage-0 Merge Join Operator [MERGEJOIN_277] (rows=25 width=4) Conds:(Right Outer),Output:["_col0"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Group By Operator [GBY_339] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_335] + Group By Operator [GBY_334] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_301] PartitionCols:_col0 - Group By Operator [GBY_317] (rows=25 width=4) + Group By Operator [GBY_298] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_314] (rows=50 width=12) + Select Operator [SEL_295] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_312] (rows=50 width=12) + Filter Operator [FIL_293] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_26] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_348] - Select Operator [SEL_347] (rows=1 width=8) - Filter Operator [FIL_346] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_343] + Select Operator [SEL_342] (rows=1 width=8) + Filter Operator [FIL_341] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_345] (rows=1 width=8) + Group By Operator [GBY_340] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Group By Operator [GBY_343] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_339] + Group By Operator [GBY_338] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_342] (rows=25 width=4) - Group By Operator [GBY_341] (rows=25 width=4) + Select Operator [SEL_337] (rows=25 width=4) + Group By Operator [GBY_336] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_302] PartitionCols:_col0 - Group By Operator [GBY_318] (rows=25 width=4) + Group By Operator [GBY_299] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_315] (rows=50 width=12) + Select Operator [SEL_296] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_312] + Please refer to the previous Filter Operator [FIL_293] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] Select Operator [SEL_107] (rows=224732600 width=119) @@ -245,41 +243,41 @@ Stage-0 Merge Join Operator [MERGEJOIN_278] (rows=224732600 width=119) Conds:(Left Outer),Output:["_col2","_col4","_col7","_col13"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_324] - Group By Operator [GBY_322] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_305] + Group By Operator [GBY_303] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + SHUFFLE [RS_300] PartitionCols:_col0 - Group By Operator [GBY_316] (rows=25 width=4) + Group By Operator [GBY_297] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_313] (rows=50 width=12) + Select Operator [SEL_294] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_312] + Please refer to the previous Filter Operator [FIL_293] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_104] Merge Join Operator [MERGEJOIN_276] (rows=8989304 width=8) Conds:RS_101._col5=RS_102._col0(Inner),Output:["_col2","_col4","_col7"] - <-Reducer 32 [SIMPLE_EDGE] + <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_102] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_275] (rows=39720279 width=4) - Conds:RS_335._col1, _col2=RS_338._col0, _col1(Inner),Output:["_col0"] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_335] + Conds:RS_330._col1, _col2=RS_333._col0, _col1(Inner),Output:["_col0"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] PartitionCols:_col1, _col2 - Select Operator [SEL_334] (rows=40000000 width=188) + Select Operator [SEL_329] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_333] (rows=40000000 width=188) + Filter Operator [FIL_328] (rows=40000000 width=188) predicate:(ca_county is not null and ca_state is not null) TableScan [TS_74] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] PartitionCols:_col0, _col1 - Select Operator [SEL_337] (rows=1704 width=184) + Select Operator [SEL_332] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_336] (rows=1704 width=184) + Filter Operator [FIL_331] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) TableScan [TS_77] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] @@ -287,11 +285,11 @@ Stage-0 SHUFFLE [RS_101] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_274] (rows=8989304 width=12) - Conds:RS_98._col0=RS_332._col0(Inner),Output:["_col2","_col4","_col5","_col7"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] + Conds:RS_98._col0=RS_327._col0(Inner),Output:["_col2","_col4","_col5","_col7"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] PartitionCols:_col0 - Select Operator [SEL_331] (rows=73049 width=8) + Select Operator [SEL_326] (rows=73049 width=8) Output:["_col0","_col1"] TableScan [TS_72] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] @@ -299,11 +297,37 @@ Stage-0 SHUFFLE [RS_98] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_273] (rows=8989304 width=8) - Conds:RS_95._col1=RS_303._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + Conds:RS_95._col1=RS_325._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_269] (rows=525327388 width=114) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_292] + Select Operator [SEL_291] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_290] (rows=525327388 width=114) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_23] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_311] + Select Operator [SEL_310] (rows=1 width=8) + Filter Operator [FIL_309] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_308] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_307] + Group By Operator [GBY_306] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_304] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_303] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_303] + SHUFFLE [RS_325] PartitionCols:_col0 - Group By Operator [GBY_302] (rows=55046 width=8) + Group By Operator [GBY_324] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_69] @@ -311,13 +335,13 @@ Stage-0 Group By Operator [GBY_68] (rows=55046 width=8) Output:["_col0","_col1"],keys:_col5, _col6 Merge Join Operator [MERGEJOIN_272] (rows=110092 width=8) - Conds:RS_64._col1=RS_301._col0(Inner),Output:["_col5","_col6"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] + Conds:RS_64._col1=RS_323._col0(Inner),Output:["_col5","_col6"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0 - Select Operator [SEL_300] (rows=80000000 width=8) + Select Operator [SEL_322] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_299] (rows=80000000 width=8) + Filter Operator [FIL_321] (rows=80000000 width=8) predicate:c_current_addr_sk is not null TableScan [TS_55] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] @@ -325,13 +349,13 @@ Stage-0 SHUFFLE [RS_64] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_271] (rows=110092 width=0) - Conds:RS_61._col2=RS_298._col0(Inner),Output:["_col1"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + Conds:RS_61._col2=RS_320._col0(Inner),Output:["_col1"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Select Operator [SEL_297] (rows=453 width=4) + Select Operator [SEL_319] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_296] (rows=453 width=186) + Filter Operator [FIL_318] (rows=453 width=186) predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) TableScan [TS_52] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] @@ -339,84 +363,47 @@ Stage-0 SHUFFLE [RS_61] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_270] (rows=11665117 width=7) - Conds:Union 19._col0=RS_292._col0(Inner),Output:["_col1","_col2"] - <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_292] + Conds:Union 19._col0=RS_314._col0(Inner),Output:["_col1","_col2"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_314] PartitionCols:_col0 - Select Operator [SEL_291] (rows=50 width=4) + Select Operator [SEL_313] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_290] (rows=50 width=12) + Filter Operator [FIL_312] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_49] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Union 19 [SIMPLE_EDGE] <-Map 18 [CONTAINS] vectorized - Reduce Output Operator [RS_364] + Reduce Output Operator [RS_359] PartitionCols:_col0 - Select Operator [SEL_363] (rows=285117831 width=11) + Select Operator [SEL_358] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_362] (rows=285117831 width=11) + Filter Operator [FIL_357] (rows=285117831 width=11) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_280] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_360] - Group By Operator [GBY_359] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_355] + Group By Operator [GBY_354] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=50 width=4) + Select Operator [SEL_315] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_291] - <-Map 25 [CONTAINS] vectorized - Reduce Output Operator [RS_367] + Please refer to the previous Select Operator [SEL_313] + <-Map 24 [CONTAINS] vectorized + Reduce Output Operator [RS_362] PartitionCols:_col0 - Select Operator [SEL_366] (rows=143930993 width=11) + Select Operator [SEL_361] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_365] (rows=143930993 width=11) + Filter Operator [FIL_360] (rows=143930993 width=11) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) TableScan [TS_285] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_361] - Please refer to the previous Group By Operator [GBY_359] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_269] (rows=525327388 width=114) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_311] - Select Operator [SEL_310] (rows=525327388 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_309] (rows=525327388 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_96_customer_c_customer_sk_min) AND DynamicValue(RS_96_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_96_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_304] (rows=55046 width=4) - Output:["_col0"] - Please refer to the previous Group By Operator [GBY_302] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_330] - Select Operator [SEL_329] (rows=1 width=8) - Filter Operator [FIL_328] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_327] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_326] - Group By Operator [GBY_325] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_323] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_322] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_356] + Please refer to the previous Group By Operator [GBY_354] http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out index d97f9df..f008099 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -227,32 +227,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Reducer 10 <- Union 9 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 10 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 10 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_146] - Limit [LIM_145] (rows=1 width=200) + File Output Operator [FS_149] + Limit [LIM_148] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_144] (rows=1 width=200) + Select Operator [SEL_147] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=200) + SHUFFLE [RS_146] + Group By Operator [GBY_145] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -263,47 +263,47 @@ Stage-0 keys:_col6,sort order:+,top n:100 Merge Join Operator [MERGEJOIN_118] (rows=1 width=200) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_130._col0=RS_133._col2(Inner),Output:["_col1","_col2"] + Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_144] PartitionCols:_col2 - Select Operator [SEL_132] (rows=1704 width=276) + Select Operator [SEL_143] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_131] (rows=1704 width=181) + Filter Operator [FIL_142] (rows=1704 width=181) predicate:substr(s_zip, 1, 2) is not null TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_129] (rows=1 width=184) + Select Operator [SEL_140] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_128] (rows=1 width=192) + Filter Operator [FIL_139] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_127] (rows=3098 width=192) + Group By Operator [GBY_138] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Union 9 [SIMPLE_EDGE] + <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_167] + Reduce Output Operator [RS_170] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=3098 width=192) + Group By Operator [GBY_169] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_165] (rows=1126 width=192) + Group By Operator [GBY_168] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_167] PartitionCols:_col0 - Group By Operator [GBY_163] (rows=1126 width=192) + Group By Operator [GBY_166] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_162] (rows=2253 width=97) + Select Operator [SEL_165] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_161] (rows=2253 width=97) + Filter Operator [FIL_164] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_160] (rows=6761 width=97) + Group By Operator [GBY_163] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] @@ -311,40 +311,40 @@ Stage-0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1"] + Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_155] (rows=40000000 width=93) + Select Operator [SEL_158] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=40000000 width=93) + Filter Operator [FIL_157] (rows=40000000 width=93) predicate:substr(substr(ca_zip, 1, 5), 1, 2) is not null TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_158] (rows=26666667 width=4) + Select Operator [SEL_161] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_157] (rows=26666667 width=89) + Filter Operator [FIL_160] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] - <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_153] + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_156] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=3098 width=192) + Group By Operator [GBY_155] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_151] (rows=5071 width=192) + Group By Operator [GBY_154] (rows=5071 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_149] (rows=70994 width=192) + Group By Operator [GBY_152] (rows=70994 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_148] (rows=20000000 width=89) + Select Operator [SEL_151] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_147] (rows=20000000 width=89) + Filter Operator [FIL_150] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736' , '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '6569 0', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15 799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] @@ -352,34 +352,34 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_115] (rows=37399754 width=42) - Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2"] + Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_127] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2002)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + SHUFFLE [RS_137] PartitionCols:_col0 - Select Operator [SEL_137] (rows=525329897 width=114) + Select Operator [SEL_136] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_136] (rows=525329897 width=114) - predicate:((ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_135] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_92] - Group By Operator [GBY_91] (rows=1 width=12) + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_90] (rows=1 width=8) + Select Operator [SEL_130] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] - PartitionCols:_col0 - Select Operator [SEL_140] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_139] (rows=130 width=12) - predicate:((d_qoy = 1) and (d_year = 2002)) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + Please refer to the previous Select Operator [SEL_128]