[5/5] hive git commit: HIVE-11954: Extend logic to choose side table in MapJoin Conversion algorithm (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)

jcamacho Fri, 23 Oct 2015 08:31:41 -0700

HIVE-11954: Extend logic to choose side table in MapJoin Conversion algorithm 
(Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8e62edac
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8e62edac
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8e62edac

Branch: refs/heads/master
Commit: 8e62edac34538d38d3ba4db158481f5d7735199f
Parents: 37a8fe0
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Thu Oct 22 13:43:14 2015 -0700
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Fri Oct 23 08:30:15 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/OperatorUtils.java      |   21 +
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |   99 +-
 .../llap/bucket_map_join_tez1.q.out             |  244 ++--
 .../llap/bucket_map_join_tez2.q.out             |  112 +-
 .../llap/dynamic_partition_pruning.q.out        |  130 ++-
 .../clientpositive/llap/explainuser_2.q.out     | 1070 +++++++++---------
 .../test/results/clientpositive/llap/mrr.q.out  |   53 +-
 .../vectorized_dynamic_partition_pruning.q.out  |  130 ++-
 .../tez/auto_sortmerge_join_10.q.out            |   57 +-
 .../tez/auto_sortmerge_join_12.q.out            |   95 +-
 .../tez/bucket_map_join_tez1.q.out              |  236 ++--
 .../tez/bucket_map_join_tez2.q.out              |  108 +-
 .../tez/cross_product_check_2.q.out             |  195 ++--
 .../tez/dynamic_partition_pruning.q.out         |  128 ++-
 .../clientpositive/tez/explainuser_2.q.out      | 1070 +++++++++---------
 .../test/results/clientpositive/tez/mrr.q.out   |   53 +-
 .../clientpositive/tez/unionDistinct_1.q.out    |   58 +-
 .../vectorized_dynamic_partition_pruning.q.out  |  130 ++-
 18 files changed, 2004 insertions(+), 1985 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
index f00fc77..bd10912 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
@@ -277,4 +277,25 @@ public class OperatorUtils {
     }
     return resultMap.build();
   }
+
+  /**
+   * Given an operator and a set of classes, it returns the number of 
operators it finds
+   * upstream that instantiate any of the given classes.
+   *
+   * @param start the start operator
+   * @param classes the set of classes
+   * @return the number of operators
+   */
+  public static int countOperatorsUpstream(Operator<?> start, Set<Class<? 
extends Operator<?>>> classes) {
+    Multimap<Class<? extends Operator<?>>, Operator<?>> ops = 
classifyOperatorsUpstream(start, classes);
+    int numberOperators = 0;
+    Set<Operator<?>> uniqueOperators = new HashSet<Operator<?>>();
+    for (Operator<?> op : ops.values()) {
+      if (uniqueOperators.add(op)) {
+        numberOperators++;
+      }
+    }
+    return numberOperators;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 024849e..e63de7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -32,18 +32,22 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
+import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
 import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.MuxOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.PTFOperator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
@@ -61,6 +65,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.util.ReflectionUtils;
 
+import com.google.common.collect.ImmutableSet;
+
 /**
  * ConvertJoinMapJoin is an optimization that replaces a common join
  * (aka shuffle join) with a map join (aka broadcast or fragment replicate
@@ -70,7 +76,18 @@ import org.apache.hadoop.util.ReflectionUtils;
  */
 public class ConvertJoinMapJoin implements NodeProcessor {
 
-  static final private Log LOG = 
LogFactory.getLog(ConvertJoinMapJoin.class.getName());
+  private static final Log LOG = 
LogFactory.getLog(ConvertJoinMapJoin.class.getName());
+
+  @SuppressWarnings({ "unchecked", "rawtypes" })
+  private static final Set<Class<? extends Operator<?>>> COSTLY_OPERATORS =
+          new ImmutableSet.Builder()
+                  .add(CommonJoinOperator.class)
+                  .add(GroupByOperator.class)
+                  .add(LateralViewJoinOperator.class)
+                  .add(PTFOperator.class)
+                  .add(ReduceSinkOperator.class)
+                  .add(UDTFOperator.class)
+                  .build();
 
   @Override
   /*
@@ -538,16 +555,20 @@ public class ConvertJoinMapJoin implements NodeProcessor {
         HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
 
     int bigTablePosition = -1;
-
+    // number of costly ops (Join, GB, PTF/Windowing, TF) below the big input
+    int bigInputNumberCostlyOps = -1;
+    // stats of the big input
     Statistics bigInputStat = null;
-    long totalSize = 0;
-    int pos = 0;
 
     // bigTableFound means we've encountered a table that's bigger than the
     // max. This table is either the the big table or we cannot convert.
-    boolean bigTableFound = false;
+    boolean foundInputNotFittingInMemory = false;
 
-    for (Operator<? extends OperatorDesc> parentOp : 
joinOp.getParentOperators()) {
+    // total size of the inputs
+    long totalSize = 0;
+
+    for (int pos = 0; pos < joinOp.getParentOperators().size(); pos++) {
+      Operator<? extends OperatorDesc> parentOp = 
joinOp.getParentOperators().get(pos);
 
       Statistics currInputStat = parentOp.getStatistics();
       if (currInputStat == null) {
@@ -556,15 +577,17 @@ public class ConvertJoinMapJoin implements NodeProcessor {
       }
 
       long inputSize = currInputStat.getDataSize();
+
+      boolean currentInputNotFittingInMemory = false;
       if ((bigInputStat == null)
-          || ((bigInputStat != null) && (inputSize > 
bigInputStat.getDataSize()))) {
+              || ((bigInputStat != null) && (inputSize > 
bigInputStat.getDataSize()))) {
 
-        if (bigTableFound) {
+        if (foundInputNotFittingInMemory) {
           // cannot convert to map join; we've already chosen a big table
           // on size and there's another one that's bigger.
           return -1;
         }
-
+        
         if (inputSize/buckets > maxSize) {
           if (!bigTableCandidateSet.contains(pos)) {
             // can't use the current table as the big table, but it's too
@@ -572,33 +595,46 @@ public class ConvertJoinMapJoin implements NodeProcessor {
             return -1;
           }
 
-          bigTableFound = true;
+          currentInputNotFittingInMemory = true;
+          foundInputNotFittingInMemory = true;
         }
+      }
 
-        if (bigInputStat != null) {
-          // we're replacing the current big table with a new one. Need
-          // to count the current one as a map table then.
-          totalSize += bigInputStat.getDataSize();
-        }
+      int currentInputNumberCostlyOps = foundInputNotFittingInMemory ?
+              -1 : OperatorUtils.countOperatorsUpstream(parentOp, 
COSTLY_OPERATORS);
+
+      // This input is the big table if it is contained in the big candidates 
set, and either:
+      // 1) we have not chosen a big table yet, or
+      // 2) it has been chosen as the big table above, or
+      // 3) the number of costly operators for this input is higher, or
+      // 4) the number of costly operators is equal, but the size is bigger,
+      boolean selectedBigTable = bigTableCandidateSet.contains(pos) &&
+              (bigInputStat == null || currentInputNotFittingInMemory ||
+                      (!foundInputNotFittingInMemory && 
(currentInputNumberCostlyOps > bigInputNumberCostlyOps ||
+                              (currentInputNumberCostlyOps == 
bigInputNumberCostlyOps && inputSize > bigInputStat.getDataSize()))));
+
+      if (bigInputStat != null && selectedBigTable) {
+        // We are replacing the current big table with a new one, thus
+        // we need to count the current one as a map table then.
+        totalSize += bigInputStat.getDataSize();
+      } else if (!selectedBigTable) {
+        // This is not the first table and we are not using it as big table,
+        // in fact, we're adding this table as a map table
+        totalSize += inputSize;
+      }
 
-        if (totalSize/buckets > maxSize) {
-          // sum of small tables size in this join exceeds configured limit
-          // hence cannot convert.
-          return -1;
-        }
+      if (totalSize/buckets > maxSize) {
+        // sum of small tables size in this join exceeds configured limit
+        // hence cannot convert.
+        return -1;
+      }
 
-        if (bigTableCandidateSet.contains(pos)) {
-          bigTablePosition = pos;
-          bigInputStat = currInputStat;
-        }
-      } else {
-        totalSize += currInputStat.getDataSize();
-        if (totalSize/buckets > maxSize) {
-          // cannot hold all map tables in memory. Cannot convert.
-          return -1;
-        }
+      if (selectedBigTable) {
+        bigTablePosition = pos;
+        bigInputNumberCostlyOps = currentInputNumberCostlyOps;
+        bigInputStat = currInputStat;
       }
-      pos++;
+
     }
 
     return bigTablePosition;
@@ -616,7 +652,6 @@ public class ConvertJoinMapJoin implements NodeProcessor {
    *
    * for tez.
    */
-
   public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, 
OptimizeTezProcContext context,
       int bigTablePosition, boolean removeReduceSink) throws SemanticException 
{
     // bail on mux operator because currently the mux operator masks the emit 
keys

http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
index 4699e10..1f1bf3d 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
@@ -329,8 +329,7 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Map 3 <- Reducer 2 (CUSTOM_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -362,31 +361,15 @@ STAGE PLANS:
                   Filter Operator
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 _col1 (type: int)
-                        1 key (type: int)
-                      outputColumnNames: _col0, _col1, _col3
-                      input vertices:
-                        0 Reducer 2
-                      Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Select Operator
-                        expressions: _col1 (type: int), _col0 (type: double), 
_col3 (type: string)
-                        outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 133 Data size: 1411 Basic 
stats: COMPLETE Column stats: NONE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.TextInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Reduce Output Operator
+                      key expressions: key (type: int)
+                      sort order: +
+                      Map-reduce partition columns: key (type: int)
+                      Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: value (type: string)
             Execution mode: llap
         Reducer 2 
-            Execution mode: llap
+            Execution mode: uber
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -398,12 +381,28 @@ STAGE PLANS:
                   expressions: _col1 (type: double), _col0 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    Statistics: Num rows: 7 Data size: 728 Basic stats: 
COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: double)
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col1 (type: int)
+                      1 key (type: int)
+                    outputColumnNames: _col0, _col1, _col3
+                    input vertices:
+                      1 Map 3
+                    Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                    HybridGraceHashJoin: true
+                    Select Operator
+                      expressions: _col1 (type: int), _col0 (type: double), 
_col3 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.TextInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -556,8 +555,7 @@ STAGE PLANS:
     Tez
       Edges:
         Map 1 <- Map 3 (CUSTOM_EDGE)
-        Map 4 <- Reducer 2 (CUSTOM_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -614,31 +612,15 @@ STAGE PLANS:
                   Filter Operator
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 _col1 (type: int)
-                        1 key (type: int)
-                      outputColumnNames: _col0, _col1, _col3
-                      input vertices:
-                        0 Reducer 2
-                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Select Operator
-                        expressions: _col1 (type: int), _col0 (type: double), 
_col3 (type: string)
-                        outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 275 Data size: 2921 Basic 
stats: COMPLETE Column stats: NONE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.TextInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Reduce Output Operator
+                      key expressions: key (type: int)
+                      sort order: +
+                      Map-reduce partition columns: key (type: int)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: value (type: string)
             Execution mode: llap
         Reducer 2 
-            Execution mode: llap
+            Execution mode: uber
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -650,12 +632,28 @@ STAGE PLANS:
                   expressions: _col1 (type: double), _col0 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 66 Data size: 700 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    Statistics: Num rows: 66 Data size: 700 Basic stats: 
COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: double)
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col1 (type: int)
+                      1 key (type: int)
+                    outputColumnNames: _col0, _col1, _col3
+                    input vertices:
+                      1 Map 4
+                    Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                    HybridGraceHashJoin: true
+                    Select Operator
+                      expressions: _col1 (type: int), _col0 (type: double), 
_col3 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.TextInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -871,8 +869,7 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Map 3 <- Reducer 2 (CUSTOM_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -904,31 +901,15 @@ STAGE PLANS:
                   Filter Operator
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 _col0 (type: int)
-                        1 key (type: int)
-                      outputColumnNames: _col0, _col1, _col3
-                      input vertices:
-                        0 Reducer 2
-                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Select Operator
-                        expressions: _col0 (type: int), _col1 (type: double), 
_col3 (type: string)
-                        outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 275 Data size: 2921 Basic 
stats: COMPLETE Column stats: NONE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.TextInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Reduce Output Operator
+                      key expressions: key (type: int)
+                      sort order: +
+                      Map-reduce partition columns: key (type: int)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: value (type: string)
             Execution mode: llap
         Reducer 2 
-            Execution mode: llap
+            Execution mode: uber
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -936,12 +917,28 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col1 (type: double)
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int)
+                    1 key (type: int)
+                  outputColumnNames: _col0, _col1, _col3
+                  input vertices:
+                    1 Map 3
+                  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: double), 
_col3 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -967,8 +964,7 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Map 3 <- Reducer 2 (CUSTOM_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -994,31 +990,15 @@ STAGE PLANS:
                   Filter Operator
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 _col0 (type: int)
-                        1 key (type: int)
-                      outputColumnNames: _col0, _col1, _col3
-                      input vertices:
-                        0 Reducer 2
-                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Select Operator
-                        expressions: _col0 (type: int), _col1 (type: double), 
_col3 (type: string)
-                        outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 275 Data size: 2921 Basic 
stats: COMPLETE Column stats: NONE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.TextInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Reduce Output Operator
+                      key expressions: key (type: int)
+                      sort order: +
+                      Map-reduce partition columns: key (type: int)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: value (type: string)
             Execution mode: llap
         Reducer 2 
-            Execution mode: llap
+            Execution mode: uber
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -1026,12 +1006,28 @@ STAGE PLANS:
                 mode: complete
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col1 (type: double)
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int)
+                    1 key (type: int)
+                  outputColumnNames: _col0, _col1, _col3
+                  input vertices:
+                    1 Map 3
+                  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: double), 
_col3 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out 
b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
index 111aaaa..68d1253 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
@@ -527,8 +527,7 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Map 3 <- Reducer 2 (CUSTOM_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -558,38 +557,38 @@ STAGE PLANS:
                   Filter Operator
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 _col0 (type: int)
-                        1 key (type: int)
-                      outputColumnNames: _col0, _col1
-                      input vertices:
-                        0 Reducer 2
-                      Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.TextInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Reduce Output Operator
+                      key expressions: key (type: int)
+                      sort order: +
+                      Map-reduce partition columns: key (type: int)
+                      Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
             Execution mode: llap
         Reducer 2 
-            Execution mode: llap
+            Execution mode: uber
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 60 Data size: 636 Basic stats: 
COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int)
+                    1 key (type: int)
+                  outputColumnNames: _col0, _col1
+                  input vertices:
+                    1 Map 3
+                  Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -609,8 +608,7 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Map 3 <- Reducer 2 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -640,43 +638,43 @@ STAGE PLANS:
                   Filter Operator
                     predicate: UDFToDouble(key) is not null (type: boolean)
                     Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 UDFToDouble(_col0) (type: double)
-                        1 UDFToDouble(key) (type: double)
-                      outputColumnNames: _col0, _col2
-                      input vertices:
-                        0 Reducer 2
-                      Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Select Operator
-                        expressions: _col0 (type: string), _col2 (type: string)
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 133 Data size: 1411 Basic 
stats: COMPLETE Column stats: NONE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.TextInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    Reduce Output Operator
+                      key expressions: UDFToDouble(key) (type: double)
+                      sort order: +
+                      Map-reduce partition columns: UDFToDouble(key) (type: 
double)
+                      Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: value (type: string)
             Execution mode: llap
         Reducer 2 
-            Execution mode: llap
+            Execution mode: uber
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: UDFToDouble(_col0) (type: double)
-                  sort order: +
-                  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
-                  Statistics: Num rows: 60 Data size: 636 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string)
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 UDFToDouble(_col0) (type: double)
+                    1 UDFToDouble(key) (type: double)
+                  outputColumnNames: _col0, _col2
+                  input vertices:
+                    1 Map 3
+                  Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  Select Operator
+                    expressions: _col0 (type: string), _col2 (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out 
b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 3ebd690..4320f01 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -4262,7 +4262,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
 #### A masked pattern was here ####
 1000
-Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross 
product
 PREHOOK: query: -- parent is reduce tasks
 EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from 
srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
 PREHOOK: type: QUERY
@@ -4277,9 +4277,8 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Map 1 <- Reducer 4 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -4290,26 +4289,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 
-                        1 
-                      input vertices:
-                        1 Reducer 4
-                      Statistics: Num rows: 1100 Data size: 11686 Basic stats: 
COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
             Execution mode: llap
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -4330,7 +4314,35 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1000 Data size: 10624 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: llap
-        Reducer 2 
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 
+                      1 
+                    input vertices:
+                      0 Map 1
+                    Statistics: Num rows: 1100 Data size: 11686 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: bigint)
+        Reducer 4 
             Execution mode: uber
             Reduce Operator Tree:
               Group By Operator
@@ -4345,19 +4357,6 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -4365,7 +4364,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross 
product
 PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as 
`date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = 
'2008-04-08'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart
@@ -4812,9 +4811,8 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Map 2 <- Map 1 (BROADCAST_EDGE)
-        Map 3 <- Map 2 (BROADCAST_EDGE)
-        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -4851,13 +4849,27 @@ STAGE PLANS:
                         0 Map 1
                       Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
                       HybridGraceHashJoin: true
-                      Reduce Output Operator
-                        key expressions: '13' (type: string)
-                        sort order: +
-                        Map-reduce partition columns: '13' (type: string)
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 '13' (type: string)
+                          1 '13' (type: string)
+                        input vertices:
+                          1 Map 4
                         Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+                        HybridGraceHashJoin: true
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
             Execution mode: llap
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: srcpart_hour
@@ -4866,27 +4878,13 @@ STAGE PLANS:
                   Filter Operator
                     predicate: (hr = 13) (type: boolean)
                     Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 '13' (type: string)
-                        1 '13' (type: string)
-                      input vertices:
-                        0 Map 2
-                      Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
+                    Reduce Output Operator
+                      key expressions: '13' (type: string)
+                      sort order: +
+                      Map-reduce partition columns: '13' (type: string)
+                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
             Execution mode: llap
-        Reducer 4 
+        Reducer 3 
             Execution mode: uber
             Reduce Operator Tree:
               Group By Operator

[5/5] hive git commit: HIVE-11954: Extend logic to choose side table in MapJoin Conversion algorithm (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)

Reply via email to