backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result in value '0' after running 'analyze table TABLE_NAME compute statistics;' (Prasanth Jayachandran reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9eb95813 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9eb95813 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9eb95813 Branch: refs/heads/branch-1.0 Commit: 9eb95813a0b58601642fe9293d7cea8cbb0a2215 Parents: 301de83 Author: Pengcheng Xiong <pxi...@apache.org> Authored: Thu Aug 13 23:16:32 2015 -0700 Committer: Pengcheng Xiong <pxi...@apache.org> Committed: Thu Aug 13 23:16:32 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/StatsNoJobTask.java | 8 +- .../hive/ql/optimizer/GenMRTableScan1.java | 17 +- .../hive/ql/parse/ProcessAnalyzeTable.java | 17 +- .../hadoop/hive/ql/plan/StatsNoJobWork.java | 10 + .../test/queries/clientpositive/orc_analyze.q | 28 +- .../clientpositive/annotate_stats_part.q.out | 52 +- .../clientpositive/annotate_stats_table.q.out | 20 +- .../results/clientpositive/limit_pushdown.q.out | 48 +- .../results/clientpositive/orc_analyze.q.out | 998 ++++++++++++++++--- .../clientpositive/tez/limit_pushdown.q.out | 48 +- .../clientpositive/tez/orc_analyze.q.out | 998 ++++++++++++++++--- .../clientpositive/tez/vector_char_simple.q.out | 16 +- .../tez/vector_left_outer_join.q.out | 16 +- .../tez/vector_varchar_simple.q.out | 16 +- .../clientpositive/tez/vectorization_0.q.out | 42 +- .../clientpositive/tez/vectorization_13.q.out | 36 +- .../clientpositive/tez/vectorization_14.q.out | 20 +- .../clientpositive/tez/vectorization_15.q.out | 20 +- .../clientpositive/tez/vectorization_16.q.out | 16 +- .../clientpositive/tez/vectorization_7.q.out | 20 +- .../clientpositive/tez/vectorization_8.q.out | 20 +- .../clientpositive/tez/vectorization_9.q.out | 16 +- .../clientpositive/tez/vectorization_div0.q.out | 28 +- .../tez/vectorization_limit.q.out | 84 +- .../tez/vectorization_pushdown.q.out | 6 +- .../tez/vectorization_short_regress.q.out | 152 +-- .../tez/vectorized_distinct_gby.q.out | 8 +- .../clientpositive/tez/vectorized_mapjoin.q.out | 14 +- .../tez/vectorized_nested_mapjoin.q.out | 26 +- .../tez/vectorized_shufflejoin.q.out | 16 +- .../clientpositive/vector_char_simple.q.out | 16 +- .../clientpositive/vector_coalesce.q.out | 40 +- .../clientpositive/vector_decimal_cast.q.out | 10 +- .../results/clientpositive/vector_elt.q.out | 12 +- .../results/clientpositive/vector_if_expr.q.out | 12 +- .../clientpositive/vector_left_outer_join.q.out | 12 +- .../clientpositive/vector_varchar_simple.q.out | 16 +- .../clientpositive/vectorization_0.q.out | 42 +- .../clientpositive/vectorization_13.q.out | 36 +- .../clientpositive/vectorization_14.q.out | 20 +- .../clientpositive/vectorization_15.q.out | 20 +- .../clientpositive/vectorization_16.q.out | 16 +- .../clientpositive/vectorization_7.q.out | 20 +- .../clientpositive/vectorization_8.q.out | 20 +- .../clientpositive/vectorization_9.q.out | 16 +- .../clientpositive/vectorization_div0.q.out | 36 +- .../clientpositive/vectorization_limit.q.out | 94 +- .../clientpositive/vectorization_pushdown.q.out | 6 +- .../vectorization_short_regress.q.out | 152 +-- .../clientpositive/vectorized_case.q.out | 8 +- .../clientpositive/vectorized_casts.q.out | 8 +- .../vectorized_distinct_gby.q.out | 8 +- .../clientpositive/vectorized_mapjoin.q.out | 12 +- .../clientpositive/vectorized_math_funcs.q.out | 8 +- .../vectorized_nested_mapjoin.q.out | 22 +- .../clientpositive/vectorized_shufflejoin.q.out | 16 +- .../vectorized_string_funcs.q.out | 8 +- .../clientpositive/windowing_streaming.q.out | 14 +- 58 files changed, 2499 insertions(+), 1017 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java index f089964..868cf04 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentMap; @@ -223,7 +224,12 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable int ret = 0; try { - List<Partition> partitions = getPartitionsList(); + Collection<Partition> partitions = null; + if (work.getPrunedPartitionList() == null) { + partitions = getPartitionsList(); + } else { + partitions = work.getPrunedPartitionList().getPartitions(); + } // non-partitioned table if (partitions == null) { http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 7f574dc..d0f8b6e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -90,16 +90,25 @@ public class GenMRTableScan1 implements NodeProcessor { QBParseInfo parseInfo = parseCtx.getQB().getParseInfo(); if (parseInfo.isAnalyzeCommand()) { - boolean partialScan = parseInfo.isPartialScanAnalyzeCommand(); - boolean noScan = parseInfo.isNoScanAnalyzeCommand(); - if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) { - + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, all the following statements are the same + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + // There will not be any MR or Tez job above this task StatsNoJobWork snjWork = new StatsNoJobWork(parseCtx.getQB().getParseInfo().getTableSpec()); snjWork.setStatsReliable(parseCtx.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); + if (confirmedParts.size() > 0) { + Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias); + List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo); + PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, + partCols, false); + snjWork.setPrunedPartitionList(partList); + } Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf()); ctx.setCurrTask(snjTask); ctx.setCurrTopOp(null); http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index 9fcc1b2..72a843f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -95,16 +95,25 @@ public class ProcessAnalyzeTable implements NodeProcessor { assert alias != null; TezWork tezWork = context.currentTask.getWork(); - boolean partialScan = parseInfo.isPartialScanAnalyzeCommand(); - boolean noScan = parseInfo.isNoScanAnalyzeCommand(); - if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) { - + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, all the following statements are the same + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + // There will not be any Tez job above this task StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo); + if (confirmedParts.size() > 0) { + Table source = parseContext.getQB().getMetaData().getTableForAlias(alias); + List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo); + PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, + partCols, false); + snjWork.setPrunedPartitionList(partList); + } Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf()); snjTask.setParentTasks(null); context.rootTasks.remove(context.currentTask); http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java index 5487836..3e5a607 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; /** * Client-side stats aggregator task. @@ -31,6 +32,7 @@ public class StatsNoJobWork implements Serializable { private tableSpec tableSpecs; private boolean statsReliable; + private PrunedPartitionList prunedPartitionList; public StatsNoJobWork() { } @@ -54,4 +56,12 @@ public class StatsNoJobWork implements Serializable { public void setStatsReliable(boolean statsReliable) { this.statsReliable = statsReliable; } + + public void setPrunedPartitionList(PrunedPartitionList prunedPartitionList) { + this.prunedPartitionList = prunedPartitionList; + } + + public PrunedPartitionList getPrunedPartitionList() { + return prunedPartitionList; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/queries/clientpositive/orc_analyze.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_analyze.q b/ql/src/test/queries/clientpositive/orc_analyze.q index 3621c7a..bd22e6f 100644 --- a/ql/src/test/queries/clientpositive/orc_analyze.q +++ b/ql/src/test/queries/clientpositive/orc_analyze.q @@ -30,8 +30,13 @@ STORED AS orc; INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY id; set hive.stats.autogather = true; +analyze table orc_create_people compute statistics; +desc formatted orc_create_people; + analyze table orc_create_people compute statistics partialscan; +desc formatted orc_create_people; +analyze table orc_create_people compute statistics noscan; desc formatted orc_create_people; drop table orc_create_people; @@ -70,8 +75,15 @@ INSERT OVERWRITE TABLE orc_create_people PARTITION (state) SELECT * FROM orc_create_people_staging ORDER BY id; set hive.stats.autogather = true; +analyze table orc_create_people partition(state) compute statistics; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); + analyze table orc_create_people partition(state) compute statistics partialscan; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); +analyze table orc_create_people partition(state) compute statistics noscan; desc formatted orc_create_people partition(state="Ca"); desc formatted orc_create_people partition(state="Or"); @@ -116,8 +128,15 @@ INSERT OVERWRITE TABLE orc_create_people PARTITION (state) SELECT * FROM orc_create_people_staging ORDER BY id; set hive.stats.autogather = true; +analyze table orc_create_people partition(state) compute statistics; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); + analyze table orc_create_people partition(state) compute statistics partialscan; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); +analyze table orc_create_people partition(state) compute statistics noscan; desc formatted orc_create_people partition(state="Ca"); desc formatted orc_create_people partition(state="Or"); @@ -174,8 +193,15 @@ ALTER TABLE orc_create_people SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSer ALTER TABLE orc_create_people SET FILEFORMAT ORC; set hive.stats.autogather = true; -analyze table orc_create_people partition(state) compute statistics noscan; +analyze table orc_create_people partition(state) compute statistics; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="OH"); +analyze table orc_create_people partition(state) compute statistics partialscan; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="OH"); + +analyze table orc_create_people partition(state) compute statistics noscan; desc formatted orc_create_people partition(state="Ca"); desc formatted orc_create_people partition(state="OH"); http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/annotate_stats_part.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out index fb3c17b..d8089e3 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -109,14 +109,12 @@ PREHOOK: query: -- partition level analyze statistics for specific parition analyze table loc_orc partition(year='2001') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc -PREHOOK: Input: default@loc_orc@year=2001 PREHOOK: Output: default@loc_orc PREHOOK: Output: default@loc_orc@year=2001 POSTHOOK: query: -- partition level analyze statistics for specific parition analyze table loc_orc partition(year='2001') compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc -POSTHOOK: Input: default@loc_orc@year=2001 POSTHOOK: Output: default@loc_orc POSTHOOK: Output: default@loc_orc@year=2001 PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE @@ -158,11 +156,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 9 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -181,19 +179,17 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '2001' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for all partitions analyze table loc_orc partition(year) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc -PREHOOK: Input: default@loc_orc@year=2001 -PREHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ PREHOOK: Output: default@loc_orc PREHOOK: Output: default@loc_orc@year=2001 PREHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ @@ -201,8 +197,6 @@ POSTHOOK: query: -- partition level analyze statistics for all partitions analyze table loc_orc partition(year) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc -POSTHOOK: Input: default@loc_orc@year=2001 -POSTHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ POSTHOOK: Output: default@loc_orc POSTHOOK: Output: default@loc_orc@year=2001 POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ @@ -222,11 +216,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -245,11 +239,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -268,11 +262,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- both partitions will be pruned @@ -331,11 +325,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: zip (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL @@ -354,7 +348,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -377,7 +371,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: year (type: string) outputColumnNames: _col0 @@ -402,7 +396,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 @@ -425,7 +419,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 @@ -448,11 +442,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL @@ -471,11 +465,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- This is to test filter expression evaluation on partition column @@ -496,7 +490,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -532,7 +526,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -568,7 +562,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/annotate_stats_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out index a74d85c..292ee33 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -122,11 +122,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- column level partial statistics @@ -155,11 +155,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- all selected columns have statistics @@ -180,7 +180,7 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptid (type: int) outputColumnNames: _col0 @@ -213,11 +213,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE @@ -236,7 +236,7 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string) outputColumnNames: _col0 @@ -259,7 +259,7 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptid (type: int) outputColumnNames: _col0 @@ -282,7 +282,7 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/limit_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/limit_pushdown.q.out b/ql/src/test/results/clientpositive/limit_pushdown.q.out index 4abef8c..4703478 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown.q.out @@ -360,38 +360,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double) outputColumnNames: cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -447,22 +447,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reduce Operator Tree: Group By Operator @@ -470,17 +470,17 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -538,22 +538,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) outputColumnNames: ctinyint, cstring1, cstring2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reduce Operator Tree: Group By Operator @@ -561,17 +561,17 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat