HIVE-17538 : Enhance estimation of stats to estimate even if only one column is missing stats (Vineet Garg, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5d5b63dc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5d5b63dc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5d5b63dc Branch: refs/heads/hive-14535 Commit: 5d5b63dc458f60d4afe80a9596946bc19f8a048f Parents: bb26d03 Author: Vineet Garg <[email protected]> Authored: Mon Sep 25 13:57:55 2017 -0700 Committer: Vineet Garg <[email protected]> Committed: Mon Sep 25 13:57:55 2017 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 38 +++++++--- .../clientpositive/annotate_stats_groupby.q.out | 22 +++--- .../annotate_stats_groupby2.q.out | 12 ++-- .../annotate_stats_join_pkfk.q.out | 12 ++-- .../clientpositive/annotate_stats_select.q.out | 8 +-- .../clientpositive/annotate_stats_table.q.out | 4 +- .../cbo_rp_annotate_stats_groupby.q.out | 22 +++--- .../clientpositive/filter_join_breaktask.q.out | 60 ++++++++-------- .../clientpositive/llap/explainuser_2.q.out | 58 +++++++-------- .../llap/filter_join_breaktask.q.out | 76 ++++++++++---------- .../clientpositive/ppd_repeated_alias.q.out | 52 +++++++------- .../spark/filter_join_breaktask.q.out | 48 ++++++------- .../clientpositive/tez/explainanalyze_2.q.out | 58 +++++++-------- 13 files changed, 250 insertions(+), 220 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 17d9f2d..778c918 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -35,6 +35,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -277,7 +278,28 @@ public class StatsUtils { } } - private static long getNumRows(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns, Table table, long ds) { + private static void estimateStatsForMissingCols(List<String> neededColumns, List<ColStatistics> columnStats, + Table table, HiveConf conf, long nr, List<ColumnInfo> schema) { + + Set<String> neededCols = new HashSet<>(neededColumns); + Set<String> colsWithStats = new HashSet<>(); + + for (ColStatistics cstats : columnStats) { + colsWithStats.add(cstats.getColumnName()); + } + + List<String> missingColStats = new ArrayList<String>(Sets.difference(neededCols, colsWithStats)); + + if(missingColStats.size() > 0) { + List<ColStatistics> estimatedColStats = estimateStats(table, schema, missingColStats, conf, nr); + for (ColStatistics estColStats : estimatedColStats) { + columnStats.add(estColStats); + } + } + } + + private static long getNumRows(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns, + Table table, long ds) { long nr = getNumRows(table); // number of rows -1 means that statistics from metastore is not reliable // and 0 means statistics gathering is disabled @@ -322,9 +344,13 @@ public class StatsUtils { List<ColStatistics> colStats = Lists.newArrayList(); if (fetchColStats) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); - if(colStats == null || colStats.size() < 1) { - colStats = estimateStats(table,schema,neededColumns, conf, nr); + if(colStats == null) { + colStats = Lists.newArrayList(); } + estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); + + // we should have stats for all columns (estimated or actual) + assert(neededColumns.size() == colStats.size()); long betterDS = getDataSizeFromColumnStats(nr, colStats); ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } @@ -457,15 +483,11 @@ public class StatsUtils { aggrStats.getColStats() != null && aggrStats.getColStatsSize() != 0; if (neededColumns.size() == 0 || (neededColsToRetrieve.size() > 0 && !statsRetrieved)) { + estimateStatsForMissingCols(neededColsToRetrieve, columnStats, table, conf, nr, schema); // There are some partitions with no state (or we didn't fetch any state). // Update the stats with empty list to reflect that in the // state/initialize structures. - if(columnStats.isEmpty()) { - // estimate stats - columnStats = estimateStats(table, schema, neededColumns, conf, nr); - } - // add partition column stats addPartitionColumnStats(conf, partitionColsToRetrieve, schema, table, partList, columnStats); http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index f9a1eb8..cd4b0ad 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,7 +129,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) @@ -743,30 +743,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 31c4ed1..4986879 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -196,30 +196,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: location - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), votes (type: bigint) outputColumnNames: state, votes - Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), votes (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index cdb9cea..a73e34d 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -559,19 +559,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL TableScan alias: ss Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE @@ -595,10 +595,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index dec7f40..7f5b832 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -132,11 +132,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc - Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map<string,string>), l1 (type: array<int>), st1 (type: struct<c1:int,c2:string>) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select bo1 from alltypes_orc @@ -670,11 +670,11 @@ STAGE PLANS: Processor Tree: TableScan alias: alltypes_orc - Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: map<string,string>), l1 (type: array<int>), st1 (type: struct<c1:int,c2:string>), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select i1 from (select i1 from alltypes_orc limit 10) temp http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out index 5d443f1..ff7b403 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -139,11 +139,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: explain select deptid from emp_orc http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index 88b5d84..a603cc6 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sq1:loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,7 +129,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) @@ -767,30 +767,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/filter_join_breaktask.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 8f9b636..9d00ce6 100644 --- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -38,22 +38,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: f + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: (value <> '') (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false @@ -70,13 +70,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -129,16 +129,16 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f, $hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g, $hdt$_1:m] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -150,8 +150,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col3 - columns.types int,string + columns _col0,_col2 + columns.types string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -165,31 +165,31 @@ STAGE PLANS: TableScan GatherStats: false Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col2 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) auto parallelism: false TableScan - alias: g + alias: f Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false @@ -203,8 +203,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col3 - columns.types int,string + columns _col0,_col2 + columns.types string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -213,8 +213,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col3 - columns.types int,string + columns _col0,_col2 + columns.types string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -266,7 +266,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -274,12 +274,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col5 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col5 (type: string) + expressions: _col5 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/llap/explainuser_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index e43f736..e5ba529 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -1804,34 +1804,36 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) - Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242 width=10) - predicate:key is not null - TableScan [TS_3] (rows=242 width=10) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242 width=10) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=242 width=10) - Output:["_col1"] - Filter Operator [FIL_24] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Select Operator [SEL_15] (rows=292 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) + Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=242 width=10) + default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_22] (rows=242 width=10) + predicate:key is not null + TableScan [TS_0] (rows=242 width=10) + default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242 width=10) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out index 1bff9ea..ecc2246 100644 --- a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out @@ -44,23 +44,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: f - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + alias: g + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: true Execution mode: llap @@ -116,7 +116,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [f] + /filter_join_breaktask/ds=2008-04-08 [g] Map 4 Map Operator Tree: TableScan @@ -132,13 +132,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 24 Data size: 4512 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 24 Data size: 4512 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -197,23 +197,23 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: g - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + alias: f + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: true Execution mode: llap @@ -269,7 +269,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [g] + /filter_join_breaktask/ds=2008-04-08 [f] Reducer 2 Execution mode: llap Needs Tagging: false @@ -278,19 +278,19 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Position of Big Table: 1 - Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Position of Big Table: 0 + Statistics: Num rows: 27 Data size: 5060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col2 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 27 Data size: 5060 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) auto parallelism: true Reducer 3 Execution mode: llap @@ -300,21 +300,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col5 Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col5 (type: string) + expressions: _col5 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out index 738424b..c94002f 100644 --- a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out +++ b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out @@ -257,25 +257,25 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 PREHOOK: type: QUERY POSTHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: foo is not null (type: boolean) + predicate: ((bar = 2) and foo is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) @@ -287,10 +287,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((bar = 2) and foo is not null) (type: boolean) + predicate: foo is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: foo (type: int) @@ -308,24 +308,23 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int) - TableScan alias: d Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -339,6 +338,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -346,10 +350,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col2 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col2 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index 718346f..320a52e 100644 --- a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -43,22 +43,22 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: f + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: (value <> '') (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false @@ -113,7 +113,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] Map 4 Map Operator Tree: TableScan @@ -129,13 +129,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -192,22 +192,22 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: g + alias: f Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false @@ -262,7 +262,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -270,18 +270,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col2 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) auto parallelism: false Reducer 3 Needs Tagging: true @@ -290,12 +290,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col5 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col5 (type: string) + expressions: _col5 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index 54d1ce3..546ae60 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -776,34 +776,36 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) - Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=230/242 width=3) - Output:["_col0"] - Filter Operator [FIL_23] (rows=230/242 width=3) - predicate:key is not null - TableScan [TS_3] (rows=242/242 width=3) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=218/242 width=179) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=218/242 width=179) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=179) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=230/242 width=175) - Output:["_col1"] - Filter Operator [FIL_24] (rows=230/242 width=175) - predicate:value is not null - TableScan [TS_6] (rows=242/242 width=175) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Select Operator [SEL_15] (rows=278/1166 width=3) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) + Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] + <-Select Operator [SEL_5] (rows=218/242 width=179) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=218/242 width=179) + predicate:(key is not null and value is not null) + TableScan [TS_3] (rows=242/242 width=179) + default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=230/242 width=3) + Output:["_col0"] + Filter Operator [FIL_22] (rows=230/242 width=3) + predicate:key is not null + TableScan [TS_0] (rows=242/242 width=3) + default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=230/242 width=175) + Output:["_col1"] + Filter Operator [FIL_24] (rows=230/242 width=175) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=175) + default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key UNION ALL
