HIVE-20702: Account for overhead from datastructure aware estimations during mapjoin selection (Jesus Camacho Rodriguez, reviewed by Zoltan Haindrich)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87414f37 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87414f37 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87414f37 Branch: refs/heads/master Commit: 87414f37eb6f2e2b437bf5df981335ce0b5a7a53 Parents: f0b76e2 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Sat Oct 13 12:44:38 2018 -0700 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Sat Oct 13 12:44:38 2018 -0700 ---------------------------------------------------------------------- .../hive/ql/optimizer/ConvertJoinMapJoin.java | 41 +- .../llap/auto_sortmerge_join_13.q.out | 117 ++- .../llap/bucket_map_join_tez2.q.out | 155 ++-- .../clientpositive/llap/join32_lessSize.q.out | 863 +++++++++---------- .../llap/join_max_hashtable.q.out | 97 ++- .../clientpositive/llap/unionDistinct_1.q.out | 82 +- 6 files changed, 683 insertions(+), 672 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 5217208..b4cc76a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -68,6 +68,8 @@ import org.apache.hadoop.hive.ql.plan.OpTraits; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -348,19 +350,54 @@ public class ConvertJoinMapJoin implements NodeProcessor { public long computeOnlineDataSizeGeneric(Statistics statistics, long overHeadPerRow, long overHeadPerSlot) { - long onlineDataSize = 0; long numRows = statistics.getNumRows(); if (numRows <= 0) { numRows = 1; } long worstCaseNeededSlots = 1L << DoubleMath.log2(numRows / hashTableLoadFactor, RoundingMode.UP); - onlineDataSize += statistics.getDataSize(); + onlineDataSize += statistics.getDataSize() - hashTableDataSizeAdjustment(numRows, statistics.getColumnStats()); onlineDataSize += overHeadPerRow * statistics.getNumRows(); onlineDataSize += overHeadPerSlot * worstCaseNeededSlots; return onlineDataSize; } + /** + * In data calculation logic, we include some overhead due to java object refs, etc. + * However, this overhead may be different when storing values in hashtable for mapjoin. + * Hence, we calculate a size adjustment to the original data size for a given input. + */ + private static long hashTableDataSizeAdjustment(long numRows, List<ColStatistics> colStats) { + long result = 0; + + if (numRows <= 0 || colStats == null || colStats.isEmpty()) { + return result; + } + + for (ColStatistics cs : colStats) { + if (cs != null) { + String colTypeLowerCase = cs.getColumnType().toLowerCase(); + long nonNullCount = cs.getNumNulls() > 0 ? numRows - cs.getNumNulls() + 1 : numRows; + double overhead = 0; + if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) { + overhead = JavaDataModel.get().lengthForStringOfLength(0); + } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) { + overhead = JavaDataModel.get().lengthForByteArrayOfSize(0); + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME) || + colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { + overhead = JavaDataModel.get().object(); + } + result = StatsUtils.safeAdd(StatsUtils.safeMult(nonNullCount, overhead), result); + } + } + + return result; + } + @VisibleForTesting public MemoryMonitorInfo getMemoryMonitorInfo( final HiveConf conf, http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index 1a28ce8..a8bc8b8 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -87,6 +87,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 4 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -94,18 +95,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Map Operator Tree: - TableScan alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE @@ -116,13 +105,15 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -175,6 +166,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -368,6 +381,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 4 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -375,18 +389,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Map Operator Tree: - TableScan alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE @@ -397,13 +399,15 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -456,6 +460,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -649,6 +675,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 4 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -656,18 +683,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Map Operator Tree: - TableScan alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE @@ -678,13 +693,15 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -737,6 +754,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index bbc06fa..8fe30f0 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -481,7 +481,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -505,7 +505,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -518,36 +518,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -578,7 +571,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -602,7 +595,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -615,36 +608,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1636,9 +1622,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1662,7 +1647,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b @@ -1690,31 +1675,25 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator