HIVE-11171: Join reordering algorithm might introduce projects between joins (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea663101 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea663101 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea663101 Branch: refs/heads/branch-1 Commit: ea663101db57426c6a68878729286d5cafe70abb Parents: e2f1caf Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Mon Aug 10 15:57:24 2015 +0300 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Mon Aug 10 15:57:24 2015 +0300 ---------------------------------------------------------------------- .../calcite/rules/HiveJoinCommuteRule.java | 96 ++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 16 +- .../results/clientpositive/auto_join12.q.out | 54 +- .../results/clientpositive/auto_join5.q.out | 8 +- .../constantPropagateForSubQuery.q.out | 38 +- .../clientpositive/correlationoptimizer15.q.out | 120 +- .../clientpositive/correlationoptimizer6.q.out | 818 ++++++------- ql/src/test/results/clientpositive/join12.q.out | 28 +- ql/src/test/results/clientpositive/join5.q.out | 20 +- .../join_merge_multi_expressions.q.out | 46 +- .../results/clientpositive/join_merging.q.out | 117 +- .../results/clientpositive/join_nulls.q.out | 2 +- .../results/clientpositive/limit_pushdown.q.out | 98 +- .../test/results/clientpositive/lineage3.q.out | 29 +- .../clientpositive/louter_join_ppr.q.out | 74 +- .../results/clientpositive/optional_outer.q.out | 36 +- .../clientpositive/outer_join_ppr.q.java1.7.out | 168 ++- .../results/clientpositive/ppd_gby_join.q.out | 104 +- .../test/results/clientpositive/ppd_join.q.out | 106 +- .../test/results/clientpositive/ppd_join2.q.out | 88 +- .../test/results/clientpositive/ppd_join3.q.out | 114 +- .../clientpositive/ppd_outer_join4.q.out | 88 +- .../results/clientpositive/ppd_random.q.out | 80 +- .../clientpositive/rcfile_null_value.q.out | 20 +- .../clientpositive/router_join_ppr.q.out | 170 ++- .../test/results/clientpositive/skewjoin.q.out | 46 +- .../clientpositive/tez/explainuser_1.q.out | 1094 +++++++++--------- .../clientpositive/tez/limit_pushdown.q.out | 78 +- .../test/results/clientpositive/tez/mrr.q.out | 82 +- .../results/clientpositive/tez/skewjoin.q.out | 26 +- .../results/clientpositive/tez/tez_union.q.out | 88 +- 31 files changed, 1902 insertions(+), 2050 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java new file mode 100644 index 0000000..f73affc --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.rules.JoinCommuteRule; +import org.apache.calcite.util.Permutation; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +/** + * Planner rule that permutes the inputs of a Join, if it has a Project on top + * that simply swaps the fields of both inputs. + */ +public class HiveJoinCommuteRule extends RelOptRule { + + public static final HiveJoinCommuteRule INSTANCE = new HiveJoinCommuteRule( + HiveProject.class, HiveJoin.class); + + + public HiveJoinCommuteRule(Class<? extends Project> projClazz, + Class<? extends Join> joinClazz) { + super(operand(projClazz, + operand(joinClazz, any()))); + } + + public void onMatch(final RelOptRuleCall call) { + Project topProject = call.rel(0); + Join join = call.rel(1); + + // 1. We check if it is a permutation project. If it is + // not, or this is the identity, the rule will do nothing + final Permutation topPermutation = topProject.getPermutation(); + if (topPermutation == null) { + return; + } + if (topPermutation.isIdentity()) { + return; + } + + // 2. We swap the join + final RelNode swapped = JoinCommuteRule.swap(join,true); + if (swapped == null) { + return; + } + + // 3. The result should have a project on top, otherwise we + // bail out. + if (swapped instanceof Join) { + return; + } + + // 4. We check if it is a permutation project. If it is + // not, or this is the identity, the rule will do nothing + final Project bottomProject = (Project) swapped; + final Permutation bottomPermutation = bottomProject.getPermutation(); + if (bottomPermutation == null) { + return; + } + if (bottomPermutation.isIdentity()) { + return; + } + + // 5. If the product of the topPermutation and bottomPermutation yields + // the identity, then we can swap the join and remove the project on + // top. + final Permutation product = topPermutation.product(bottomPermutation); + if (!product.isIdentity()) { + return; + } + + // 6. Return the new join as a replacement + final Join swappedJoin = (Join) bottomProject.getInput(0); + call.transformTo(swappedJoin); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 38f2014..347cd6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -142,6 +142,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransp import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; @@ -862,7 +863,16 @@ public class CalcitePlanner extends SemanticAnalyzer { calciteOptimizedPlan = hepPlanner.findBestExp(); - // run rule to fix windowing issue when it is done over + // 4. Run rule to try to remove projects on top of join operators + hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); + hepPgmBldr.addRuleInstance(HiveJoinCommuteRule.INSTANCE); + hepPlanner = new HepPlanner(hepPgmBldr.build()); + hepPlanner.registerMetadataProviders(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + hepPlanner.setRoot(calciteOptimizedPlan); + calciteOptimizedPlan = hepPlanner.findBestExp(); + + // 5. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveWindowingFixRule.INSTANCE); @@ -872,8 +882,9 @@ public class CalcitePlanner extends SemanticAnalyzer { hepPlanner.setRoot(calciteOptimizedPlan); calciteOptimizedPlan = hepPlanner.findBestExp(); + // 6. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - // run rules to aid in translation from Optiq tree -> Hive tree + // 6.1. Merge join into multijoin operators (if possible) hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveJoinToMultiJoinRule.INSTANCE); hepPlanner = new HepPlanner(hepPgmBldr.build()); @@ -882,6 +893,7 @@ public class CalcitePlanner extends SemanticAnalyzer { hepPlanner.setRoot(calciteOptimizedPlan); calciteOptimizedPlan = hepPlanner.findBestExp(); + // 6.2. Introduce exchange operators below join/multijoin operators hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN); hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/auto_join12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out index e97d7e6..7d8db0a 100644 --- a/ql/src/test/results/clientpositive/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -32,7 +32,7 @@ STAGE PLANS: $hdt$_0:$hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -51,7 +51,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -59,8 +59,8 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -77,8 +77,8 @@ STAGE PLANS: predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -86,33 +86,29 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col1,_col3) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col1,_col3) (type: int) + Group By Operator + aggregations: sum(_col0) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/auto_join5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out index 69b7aab..3209d07 100644 --- a/ql/src/test/results/clientpositive/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/auto_join5.q.out @@ -46,11 +46,11 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -81,14 +81,14 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Outer Join0 to 1 + Right Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out index 40d2dd4..b52b475 100644 --- a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out +++ b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out @@ -80,20 +80,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col1 (type: string) - auto parallelism: false - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -108,9 +94,23 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 value expressions: _col1 (type: string) auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,8 +203,8 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src [$hdt$_1:a] - /src1 [$hdt$_0:b] + /src [$hdt$_0:a] + /src1 [$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -213,10 +213,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '429' (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string) + expressions: '429' (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/correlationoptimizer15.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out index d5f45da..43ba27d 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -23,14 +23,14 @@ JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -85,7 +85,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -109,10 +109,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan alias: yy Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -127,13 +134,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -143,18 +143,14 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -259,20 +255,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: yy - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -289,44 +285,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: yy + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -353,17 +328,30 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Mux Operator + Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator