[6/6] hive git commit: HIVE-11171: Join reordering algorithm might introduce projects between joins (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

jcamacho Mon, 10 Aug 2015 05:59:16 -0700

HIVE-11171: Join reordering algorithm might introduce projects between joins 
(Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea663101
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea663101
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea663101

Branch: refs/heads/branch-1
Commit: ea663101db57426c6a68878729286d5cafe70abb
Parents: e2f1caf
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Mon Aug 10 15:57:24 2015 +0300
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Mon Aug 10 15:57:24 2015 +0300

----------------------------------------------------------------------
 .../calcite/rules/HiveJoinCommuteRule.java      |   96 ++
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |   16 +-
 .../results/clientpositive/auto_join12.q.out    |   54 +-
 .../results/clientpositive/auto_join5.q.out     |    8 +-
 .../constantPropagateForSubQuery.q.out          |   38 +-
 .../clientpositive/correlationoptimizer15.q.out |  120 +-
 .../clientpositive/correlationoptimizer6.q.out  |  818 ++++++-------
 ql/src/test/results/clientpositive/join12.q.out |   28 +-
 ql/src/test/results/clientpositive/join5.q.out  |   20 +-
 .../join_merge_multi_expressions.q.out          |   46 +-
 .../results/clientpositive/join_merging.q.out   |  117 +-
 .../results/clientpositive/join_nulls.q.out     |    2 +-
 .../results/clientpositive/limit_pushdown.q.out |   98 +-
 .../test/results/clientpositive/lineage3.q.out  |   29 +-
 .../clientpositive/louter_join_ppr.q.out        |   74 +-
 .../results/clientpositive/optional_outer.q.out |   36 +-
 .../clientpositive/outer_join_ppr.q.java1.7.out |  168 ++-
 .../results/clientpositive/ppd_gby_join.q.out   |  104 +-
 .../test/results/clientpositive/ppd_join.q.out  |  106 +-
 .../test/results/clientpositive/ppd_join2.q.out |   88 +-
 .../test/results/clientpositive/ppd_join3.q.out |  114 +-
 .../clientpositive/ppd_outer_join4.q.out        |   88 +-
 .../results/clientpositive/ppd_random.q.out     |   80 +-
 .../clientpositive/rcfile_null_value.q.out      |   20 +-
 .../clientpositive/router_join_ppr.q.out        |  170 ++-
 .../test/results/clientpositive/skewjoin.q.out  |   46 +-
 .../clientpositive/tez/explainuser_1.q.out      | 1094 +++++++++---------
 .../clientpositive/tez/limit_pushdown.q.out     |   78 +-
 .../test/results/clientpositive/tez/mrr.q.out   |   82 +-
 .../results/clientpositive/tez/skewjoin.q.out   |   26 +-
 .../results/clientpositive/tez/tez_union.q.out  |   88 +-
 31 files changed, 1902 insertions(+), 2050 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java
new file mode 100644
index 0000000..f73affc
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.rules.JoinCommuteRule;
+import org.apache.calcite.util.Permutation;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+/**
+ * Planner rule that permutes the inputs of a Join, if it has a Project on top
+ * that simply swaps the fields of both inputs.
+ */
+public class HiveJoinCommuteRule extends RelOptRule {
+
+  public static final HiveJoinCommuteRule INSTANCE = new HiveJoinCommuteRule(
+          HiveProject.class, HiveJoin.class);
+
+
+  public HiveJoinCommuteRule(Class<? extends Project> projClazz,
+      Class<? extends Join> joinClazz) {
+    super(operand(projClazz,
+            operand(joinClazz, any())));
+  }
+
+  public void onMatch(final RelOptRuleCall call) {
+    Project topProject = call.rel(0);
+    Join join = call.rel(1);
+
+    // 1. We check if it is a permutation project. If it is
+    //    not, or this is the identity, the rule will do nothing
+    final Permutation topPermutation = topProject.getPermutation();
+    if (topPermutation == null) {
+      return;
+    }
+    if (topPermutation.isIdentity()) {
+      return;
+    }
+
+    // 2. We swap the join
+    final RelNode swapped = JoinCommuteRule.swap(join,true);
+    if (swapped == null) {
+      return;
+    }
+
+    // 3. The result should have a project on top, otherwise we
+    //    bail out.
+    if (swapped instanceof Join) {
+      return;
+    }
+
+    // 4. We check if it is a permutation project. If it is
+    //    not, or this is the identity, the rule will do nothing
+    final Project bottomProject = (Project) swapped;
+    final Permutation bottomPermutation = bottomProject.getPermutation();
+    if (bottomPermutation == null) {
+      return;
+    }
+    if (bottomPermutation.isIdentity()) {
+      return;
+    }
+
+    // 5. If the product of the topPermutation and bottomPermutation yields
+    //    the identity, then we can swap the join and remove the project on
+    //    top.
+    final Permutation product = topPermutation.product(bottomPermutation);
+    if (!product.isIdentity()) {
+      return;
+    }
+
+    // 6. Return the new join as a replacement
+    final Join swappedJoin = (Join) bottomProject.getInput(0);
+    call.transformTo(swappedJoin);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 38f2014..347cd6d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -142,6 +142,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransp
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
@@ -862,7 +863,16 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       calciteOptimizedPlan = hepPlanner.findBestExp();
 
-      // run rule to fix windowing issue when it is done over
+      // 4. Run rule to try to remove projects on top of join operators
+      hepPgmBldr = new 
HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
+      hepPgmBldr.addRuleInstance(HiveJoinCommuteRule.INSTANCE);
+      hepPlanner = new HepPlanner(hepPgmBldr.build());
+      hepPlanner.registerMetadataProviders(list);
+      cluster.setMetadataProvider(new 
CachingRelMetadataProvider(chainedProvider, hepPlanner));
+      hepPlanner.setRoot(calciteOptimizedPlan);
+      calciteOptimizedPlan = hepPlanner.findBestExp();
+
+      // 5. Run rule to fix windowing issue when it is done over
       // aggregation columns (HIVE-10627)
       hepPgmBldr = new 
HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
       hepPgmBldr.addRuleInstance(HiveWindowingFixRule.INSTANCE);
@@ -872,8 +882,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       hepPlanner.setRoot(calciteOptimizedPlan);
       calciteOptimizedPlan = hepPlanner.findBestExp();
 
+      // 6. Run rules to aid in translation from Calcite tree to Hive tree
       if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
-        // run rules to aid in translation from Optiq tree -> Hive tree
+        // 6.1. Merge join into multijoin operators (if possible)
         hepPgmBldr = new 
HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
         hepPgmBldr.addRuleInstance(HiveJoinToMultiJoinRule.INSTANCE);
         hepPlanner = new HepPlanner(hepPgmBldr.build());
@@ -882,6 +893,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
         hepPlanner.setRoot(calciteOptimizedPlan);
         calciteOptimizedPlan = hepPlanner.findBestExp();
 
+        // 6.2.  Introduce exchange operators below join/multijoin operators
         hepPgmBldr = new 
HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
         
hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN);
         
hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);

http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/auto_join12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out 
b/ql/src/test/results/clientpositive/auto_join12.q.out
index e97d7e6..7d8db0a 100644
--- a/ql/src/test/results/clientpositive/auto_join12.q.out
+++ b/ql/src/test/results/clientpositive/auto_join12.q.out
@@ -32,7 +32,7 @@ STAGE PLANS:
         $hdt$_0:$hdt$_0:$hdt$_0:src 
           Fetch Operator
             limit: -1
-        $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src 
+        $hdt$_0:$hdt$_1:$hdt$_1:src 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
@@ -51,7 +51,7 @@ STAGE PLANS:
                   keys:
                     0 _col0 (type: string)
                     1 _col0 (type: string)
-        $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src 
+        $hdt$_0:$hdt$_1:$hdt$_1:src 
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
@@ -59,8 +59,8 @@ STAGE PLANS:
               predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 
80.0)) (type: boolean)
               Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: string), value (type: string)
-                outputColumnNames: _col0, _col1
+                expressions: key (type: string)
+                outputColumnNames: _col0
                 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE 
Column stats: NONE
                 HashTable Sink Operator
                   keys:
@@ -77,8 +77,8 @@ STAGE PLANS:
               predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 
80.0)) (type: boolean)
               Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: string)
-                outputColumnNames: _col0
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE 
Column stats: NONE
                 Map Join Operator
                   condition map:
@@ -86,33 +86,29 @@ STAGE PLANS:
                   keys:
                     0 _col0 (type: string)
                     1 _col0 (type: string)
-                  outputColumnNames: _col1, _col2
+                  outputColumnNames: _col0, _col2
                   Statistics: Num rows: 60 Data size: 642 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col2 (type: string), _col1 (type: string)
-                    outputColumnNames: _col0, _col2
-                    Statistics: Num rows: 60 Data size: 642 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 _col0 (type: string)
-                        1 _col0 (type: string)
-                      outputColumnNames: _col1, _col3
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col0 (type: string)
+                      1 _col0 (type: string)
+                    outputColumnNames: _col1, _col3
+                    Statistics: Num rows: 66 Data size: 706 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: hash(_col1,_col3) (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 66 Data size: 706 Basic stats: 
COMPLETE Column stats: NONE
-                      Select Operator
-                        expressions: hash(_col1,_col3) (type: int)
+                      Group By Operator
+                        aggregations: sum(_col0)
+                        mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 66 Data size: 706 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: sum(_col0)
-                          mode: hash
-                          outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
                           Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: bigint)
+                          value expressions: _col0 (type: bigint)
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/auto_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out 
b/ql/src/test/results/clientpositive/auto_join5.q.out
index 69b7aab..3209d07 100644
--- a/ql/src/test/results/clientpositive/auto_join5.q.out
+++ b/ql/src/test/results/clientpositive/auto_join5.q.out
@@ -46,11 +46,11 @@ STAGE PLANS:
   Stage: Stage-5
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_1:$hdt$_1:src1 
+        $hdt$_0:$hdt$_0:src1 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_1:$hdt$_1:src1 
+        $hdt$_0:$hdt$_0:src1 
           TableScan
             alias: src1
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
@@ -81,14 +81,14 @@ STAGE PLANS:
                 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE 
Column stats: NONE
                 Map Join Operator
                   condition map:
-                       Left Outer Join0 to 1
+                       Right Outer Join0 to 1
                   keys:
                     0 _col0 (type: string)
                     1 _col0 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 60 Data size: 642 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: UDFToInteger(_col2) (type: int), _col3 (type: 
string), UDFToInteger(_col0) (type: int), _col1 (type: string)
+                    expressions: UDFToInteger(_col0) (type: int), _col1 (type: 
string), UDFToInteger(_col2) (type: int), _col3 (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 60 Data size: 642 Basic stats: 
COMPLETE Column stats: NONE
                     File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out 
b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
index 40d2dd4..b52b475 100644
--- a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
+++ b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
@@ -80,20 +80,6 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: b
-            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-            GatherStats: false
-            Select Operator
-              expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-              Reduce Output Operator
-                sort order: 
-                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-                tag: 0
-                value expressions: _col0 (type: string), _col1 (type: string)
-                auto parallelism: false
-          TableScan
             alias: a
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
@@ -108,9 +94,23 @@ STAGE PLANS:
                 Reduce Output Operator
                   sort order: 
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                  tag: 1
+                  tag: 0
                   value expressions: _col1 (type: string)
                   auto parallelism: false
+          TableScan
+            alias: b
+            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
+                tag: 1
+                value expressions: _col0 (type: string), _col1 (type: string)
+                auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -203,8 +203,8 @@ STAGE PLANS:
               name: default.src1
             name: default.src1
       Truncated Path -> Alias:
-        /src [$hdt$_1:a]
-        /src1 [$hdt$_0:b]
+        /src [$hdt$_0:a]
+        /src1 [$hdt$_1:b]
       Needs Tagging: true
       Reduce Operator Tree:
         Join Operator
@@ -213,10 +213,10 @@ STAGE PLANS:
           keys:
             0 
             1 
-          outputColumnNames: _col0, _col1, _col3
+          outputColumnNames: _col1, _col2, _col3
           Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
-            expressions: '429' (type: string), _col3 (type: string), _col0 
(type: string), _col1 (type: string)
+            expressions: '429' (type: string), _col1 (type: string), _col2 
(type: string), _col3 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
             Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/ea663101/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out 
b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
index d5f45da..43ba27d 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
@@ -23,14 +23,14 @@ JOIN src yy
 ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-3 is a root stage
-  Stage-4 depends on stages: Stage-3
-  Stage-1 depends on stages: Stage-4
+  Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
-  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-4
 
 STAGE PLANS:
-  Stage: Stage-3
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -85,7 +85,7 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-4
+  Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -109,10 +109,17 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-1
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: bigint)
+          TableScan
             alias: yy
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
@@ -127,13 +134,6 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -143,18 +143,14 @@ STAGE PLANS:
             1 _col0 (type: string)
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
-          Select Operator
-            expressions: _col1 (type: string), _col2 (type: bigint), _col0 
(type: string)
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-2
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -259,20 +255,20 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: yy
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            alias: x
+            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
           TableScan
             alias: x
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
@@ -289,44 +285,23 @@ STAGE PLANS:
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
           TableScan
-            alias: x
-            Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
+            alias: yy
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Demux Operator
           Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE 
Column stats: NONE
-          Mux Operator
-            Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE 
Column stats: NONE
-            Join Operator
-              condition map:
-                   Inner Join 0 to 1
-              keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
-              outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
-              Select Operator
-                expressions: _col1 (type: string), _col2 (type: bigint), _col0 
(type: string)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           Join Operator
             condition map:
                  Inner Join 0 to 1
@@ -353,17 +328,30 @@ STAGE PLANS:
                       1 _col0 (type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
-                    Select Operator
-                      expressions: _col1 (type: string), _col2 (type: bigint), 
_col0 (type: string)
-                      outputColumnNames: _col0, _col1, _col2
+                    File Output Operator
+                      compressed: false
                       Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.TextInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          Mux Operator
+            Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE 
Column stats: NONE
+            Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

[6/6] hive git commit: HIVE-11171: Join reordering algorithm might introduce projects between joins (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Reply via email to