This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit b2aa6943b561a3b7691bed9fee7a1c337e99b253 Author: Zoltan Haindrich <k...@rxd.hu> AuthorDate: Thu Mar 14 07:53:49 2019 +0100 HIVE-21398: Columns which has estimated statistics should not be considered as unique keys (Zoltan Haindrich reviewed by Ashutosh Chauhan) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> --- .../calcite/stats/EstimateUniqueKeys.java | 26 +- .../runtime_skewjoin_mapjoin_spark.q.out | 284 ++++++++++----------- .../spark/runtime_skewjoin_mapjoin_spark.q.out | 44 ++-- .../spark/spark_dynamic_partition_pruning_6.q.out | 124 ++++----- 4 files changed, 239 insertions(+), 239 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java index 5ef945c..4aba098 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java @@ -102,19 +102,19 @@ public final class EstimateUniqueKeys { colStatsPos = 0; for (ColStatistics cStat : colStats) { boolean isKey = false; - if (cStat.getCountDistint() >= numRows) { - isKey = true; - } - if (!isKey && cStat.getRange() != null && - cStat.getRange().maxValue != null && - cStat.getRange().minValue != null) { - double r = cStat.getRange().maxValue.doubleValue() - - cStat.getRange().minValue.doubleValue() + 1; - isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); - } - if (isKey) { - ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); - keys.add(key); + if (!cStat.isEstimated()) { + if (cStat.getCountDistint() >= numRows) { + isKey = true; + } + if (!isKey && cStat.getRange() != null && cStat.getRange().maxValue != null + && cStat.getRange().minValue != null) { + double r = cStat.getRange().maxValue.doubleValue() - cStat.getRange().minValue.doubleValue() + 1; + isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); + } + if (isKey) { + ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); + keys.add(key); + } } colStatsPos++; } diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index 29dec2d..1883790 100644 --- a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -35,10 +35,12 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-12 is a root stage , consists of Stage-16, Stage-17, Stage-1 + Stage-18 is a root stage + Stage-13 depends on stages: Stage-18 + Stage-12 depends on stages: Stage-13 , consists of Stage-16, Stage-17, Stage-1 Stage-16 has a backup stage: Stage-1 Stage-10 depends on stages: Stage-16 - Stage-9 depends on stages: Stage-1, Stage-10, Stage-11, Stage-13 , consists of Stage-14, Stage-15, Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10, Stage-11 , consists of Stage-14, Stage-15, Stage-2 Stage-14 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-14 Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 @@ -48,24 +50,38 @@ STAGE DEPENDENCIES: Stage-17 has a backup stage: Stage-1 Stage-11 depends on stages: Stage-17 Stage-1 - Stage-18 is a root stage - Stage-13 depends on stages: Stage-18 Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-12 - Conditional Operator - - Stage: Stage-16 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:src2 + $hdt$_2:$hdt$_3:t1_n94 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:src2 + $hdt$_2:$hdt$_3:t1_n94 TableScan - alias: src2 + alias: t1_n94 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: src filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -75,10 +91,40 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-10 Map Reduce @@ -101,7 +147,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -118,16 +164,26 @@ STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $hdt$_0:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $hdt$_0:src2 TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-7 Map Reduce @@ -139,7 +195,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 870 Data size: 75698 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -196,24 +252,34 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 870 Data size: 75698 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -226,13 +292,23 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -240,7 +316,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 870 Data size: 75698 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -256,11 +332,11 @@ STAGE PLANS: Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:src1 + $hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:src1 + $hdt$_1:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -281,30 +357,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -329,21 +395,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -352,7 +408,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -360,62 +416,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-18 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:t1_n94 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:t1_n94 - TableScan - alias: t1_n94 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out index b612e91..963b1d1 100644 --- a/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out @@ -50,7 +50,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 5 Map Operator Tree: TableScan alias: t1_n94 @@ -74,7 +74,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -99,25 +99,6 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan alias: src filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -136,7 +117,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 6 + 1 Map 5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -146,6 +127,25 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out index 8cc8353..a0d1000 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out @@ -174,27 +174,6 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: part_table_2 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col (type: int), part_col (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target Columns: [Map 4 -> [part_col:int (part_col)]] - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan alias: regular_table filterExpr: col is not null (type: boolean) Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE @@ -217,6 +196,27 @@ STAGE PLANS: Spark Partition Pruning Sink Operator Target Columns: [Map 1 -> [part_col:int (part_col)], Map 4 -> [part_col:int (part_col)]] Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: part_table_2 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 1 -> [part_col:int (part_col)]] + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark @@ -227,17 +227,17 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: part_table_2 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + alias: part_table_1 + Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: col (type: int), part_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Map 3 Map Operator Tree: @@ -260,17 +260,17 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: part_table_1 - Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE + alias: part_table_2 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: col (type: int), part_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reducer 2 Reduce Operator Tree: @@ -285,7 +285,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col0 (type: int), _col1 (type: int) + expressions: _col2 (type: int), _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -378,34 +378,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_table_2 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col (type: int), part_col (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - 2 _col1 (type: int) - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target Columns: [Map 3 -> [part_col:int (part_col)]] - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work Map 2 Map Operator Tree: TableScan @@ -434,16 +406,44 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator - Target Columns: [Map 3 -> [part_col:int (part_col)]] + Target Columns: [Map 1 -> [part_col:int (part_col)]] Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work + Map 3 + Map Operator Tree: + TableScan + alias: part_table_2 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + 2 _col1 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 1 -> [part_col:int (part_col)]] + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: part_table_1 @@ -462,11 +462,11 @@ STAGE PLANS: 2 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: - 0 Map 1 1 Map 2 + 2 Map 3 Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col0 (type: int), _col1 (type: int) + expressions: _col2 (type: int), _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator