[2/2] hive git commit: HIVE-20702: Account for overhead from datastructure aware estimations during mapjoin selection (Jesus Camacho Rodriguez, reviewed by Zoltan Haindrich)

jcamacho Sat, 13 Oct 2018 12:45:01 -0700

HIVE-20702: Account for overhead from datastructure aware estimations during 
mapjoin selection (Jesus Camacho Rodriguez, reviewed by Zoltan Haindrich)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87414f37
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87414f37
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87414f37

Branch: refs/heads/master
Commit: 87414f37eb6f2e2b437bf5df981335ce0b5a7a53
Parents: f0b76e2
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Sat Oct 13 12:44:38 2018 -0700
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Sat Oct 13 12:44:38 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |  41 +-
 .../llap/auto_sortmerge_join_13.q.out           | 117 ++-
 .../llap/bucket_map_join_tez2.q.out             | 155 ++--
 .../clientpositive/llap/join32_lessSize.q.out   | 863 +++++++++----------
 .../llap/join_max_hashtable.q.out               |  97 ++-
 .../clientpositive/llap/unionDistinct_1.q.out   |  82 +-
 6 files changed, 683 insertions(+), 672 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 5217208..b4cc76a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -68,6 +68,8 @@ import org.apache.hadoop.hive.ql.plan.OpTraits;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -348,19 +350,54 @@ public class ConvertJoinMapJoin implements NodeProcessor {
 
 
   public long computeOnlineDataSizeGeneric(Statistics statistics, long 
overHeadPerRow, long overHeadPerSlot) {
-
     long onlineDataSize = 0;
     long numRows = statistics.getNumRows();
     if (numRows <= 0) {
       numRows = 1;
     }
     long worstCaseNeededSlots = 1L << DoubleMath.log2(numRows / 
hashTableLoadFactor, RoundingMode.UP);
-    onlineDataSize += statistics.getDataSize();
+    onlineDataSize += statistics.getDataSize() - 
hashTableDataSizeAdjustment(numRows, statistics.getColumnStats());
     onlineDataSize += overHeadPerRow * statistics.getNumRows();
     onlineDataSize += overHeadPerSlot * worstCaseNeededSlots;
     return onlineDataSize;
   }
 
+  /**
+   * In data calculation logic, we include some overhead due to java object 
refs, etc.
+   * However, this overhead may be different when storing values in hashtable 
for mapjoin.
+   * Hence, we calculate a size adjustment to the original data size for a 
given input.
+   */
+  private static long hashTableDataSizeAdjustment(long numRows, 
List<ColStatistics> colStats) {
+    long result = 0;
+
+    if (numRows <= 0 || colStats == null || colStats.isEmpty()) {
+      return result;
+    }
+
+    for (ColStatistics cs : colStats) {
+      if (cs != null) {
+        String colTypeLowerCase = cs.getColumnType().toLowerCase();
+        long nonNullCount = cs.getNumNulls() > 0 ? numRows - cs.getNumNulls() 
+ 1 : numRows;
+        double overhead = 0;
+        if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+            || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+            || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+          overhead = JavaDataModel.get().lengthForStringOfLength(0);
+        } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
+          overhead = JavaDataModel.get().lengthForByteArrayOfSize(0);
+        } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) 
||
+            colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME) 
||
+            colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME) ||
+            colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
+          overhead = JavaDataModel.get().object();
+        }
+        result = StatsUtils.safeAdd(StatsUtils.safeMult(nonNullCount, 
overhead), result);
+      }
+    }
+
+    return result;
+  }
+
   @VisibleForTesting
   public MemoryMonitorInfo getMemoryMonitorInfo(
                                                 final HiveConf conf,

http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out 
b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
index 1a28ce8..a8bc8b8 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
@@ -87,6 +87,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
+        Map 1 <- Map 4 (CUSTOM_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
@@ -94,18 +95,6 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -116,13 +105,15 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
+                      Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col2, _col3
+                        input vertices:
+                          1 Map 4
                         Statistics: Num rows: 16 Data size: 2976 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), _col2 (type: int)
@@ -175,6 +166,28 @@ STAGE PLANS:
                                 Statistics: Num rows: 1 Data size: 880 Basic 
stats: COMPLETE Column stats: COMPLETE
                                 value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -368,6 +381,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
+        Map 1 <- Map 4 (CUSTOM_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
@@ -375,18 +389,6 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -397,13 +399,15 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
+                      Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col2, _col3
+                        input vertices:
+                          1 Map 4
                         Statistics: Num rows: 16 Data size: 2976 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), _col2 (type: int)
@@ -456,6 +460,28 @@ STAGE PLANS:
                                 Statistics: Num rows: 1 Data size: 880 Basic 
stats: COMPLETE Column stats: COMPLETE
                                 value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -649,6 +675,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
+        Map 1 <- Map 4 (CUSTOM_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
@@ -656,18 +683,6 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -678,13 +693,15 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
+                      Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col2, _col3
+                        input vertices:
+                          1 Map 4
                         Statistics: Num rows: 16 Data size: 2976 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), _col2 (type: int)
@@ -737,6 +754,28 @@ STAGE PLANS:
                                 Statistics: Num rows: 1 Data size: 880 Basic 
stats: COMPLETE Column stats: COMPLETE
                                 value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out 
b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
index bbc06fa..8fe30f0 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
@@ -481,7 +481,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Map 2 <- Map 1 (BROADCAST_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -505,7 +505,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: int), _col1 (type: 
string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -518,36 +518,29 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: UDFToDouble(_col0) (type: double)
-                        sort order: +
-                        Map-reduce partition columns: UDFToDouble(_col0) 
(type: double)
-                        Statistics: Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string)
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 UDFToDouble(_col0) (type: double)
+                          1 UDFToDouble(_col0) (type: double)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 382 Data size: 71052 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: 
string), _col3 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 382 Data size: 71052 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 382 Data size: 71052 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 UDFToDouble(_col0) (type: double)
-                  1 UDFToDouble(_col0) (type: double)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 382 Data size: 71052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col3 
(type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 382 Data size: 71052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 382 Data size: 71052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -578,7 +571,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Map 2 <- Map 1 (BROADCAST_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -602,7 +595,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: int), _col1 (type: 
string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -615,36 +608,29 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: UDFToDouble(_col0) (type: double)
-                        sort order: +
-                        Map-reduce partition columns: UDFToDouble(_col0) 
(type: double)
-                        Statistics: Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string)
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 UDFToDouble(_col0) (type: double)
+                          1 UDFToDouble(_col0) (type: double)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 382 Data size: 71052 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: 
string), _col3 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 382 Data size: 71052 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 382 Data size: 71052 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 UDFToDouble(_col0) (type: double)
-                  1 UDFToDouble(_col0) (type: double)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 382 Data size: 71052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col3 
(type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 382 Data size: 71052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 382 Data size: 71052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -1636,9 +1622,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1662,7 +1647,7 @@ STAGE PLANS:
                         Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -1690,31 +1675,25 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: int), _col1 (type: string)
-                  1 _col0 (type: int), _col1 (type: string)
-                Statistics: Num rows: 242 Data size: 1936 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Reduce Output Operator
-                    sort order: 
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int), _col1 (type: string)
+                    1 _col0 (type: int), _col1 (type: string)
+                  input vertices:
+                    1 Map 4
+                  Statistics: Num rows: 242 Data size: 1936 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
-                    value expressions: _col0 (type: bigint)
-        Reducer 4 
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator

[2/2] hive git commit: HIVE-20702: Account for overhead from datastructure aware estimations during mapjoin selection (Jesus Camacho Rodriguez, reviewed by Zoltan Haindrich)

Reply via email to