Repository: hive Updated Branches: refs/heads/branch-1.0 2414c350f -> f1939cb5a
backport HIVE-11301: thrift metastore issue when getting stats results in disconnect (Pengcheng Xiong, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1939cb5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1939cb5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1939cb5 Branch: refs/heads/branch-1.0 Commit: f1939cb5ad3e0a10736d986f3ed47b5577da2ef9 Parents: 2414c35 Author: Pengcheng Xiong <pxi...@apache.org> Authored: Mon Oct 5 18:20:38 2015 -0700 Committer: Pengcheng Xiong <pxi...@apache.org> Committed: Mon Oct 5 18:20:38 2015 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 10 +- .../test/queries/clientpositive/stats_ppr_all.q | 24 ++ .../results/clientpositive/stats_ppr_all.q.out | 300 +++++++++++++++++++ 3 files changed, 332 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index b0bd8ce..26cf56d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -243,8 +243,14 @@ public class StatsUtils { } Map<String, String> colToTabAlias = new HashMap<String, String>(); neededColumns = processNeededColumns(schema, neededColumns, colToTabAlias); - AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), - neededColumns, partNames); + AggrStats aggrStats = null; + // We check the sizes of neededColumns and partNames here. If either + // size is 0, aggrStats is null after several retries. Thus, we can + // skip the step to connect to the metastore. + if (neededColumns.size() > 0 && partNames.size() > 0) { + aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), + neededColumns, partNames); + } if (null == aggrStats || null == aggrStats.getColStats() || aggrStats.getColStatsSize() == 0) { // There are some partitions with no state (or we didn't fetch any state). http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/test/queries/clientpositive/stats_ppr_all.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/stats_ppr_all.q b/ql/src/test/queries/clientpositive/stats_ppr_all.q new file mode 100644 index 0000000..a5630cb --- /dev/null +++ b/ql/src/test/queries/clientpositive/stats_ppr_all.q @@ -0,0 +1,24 @@ +set hive.stats.fetch.column.stats=true; + +drop table ss; + +CREATE TABLE ss ( + sales_order_id BIGINT, + order_amount FLOAT) +PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc; + +insert overwrite table ss partition(country="US", year=2015, month=1, day=1) select 1, 22.0 from src limit 1; +insert overwrite table ss partition(country="US", year=2015, month=2, day=1) select 2, 2.0 from src limit 1; +insert overwrite table ss partition(country="US", year=2015, month=1, day=2) select 1, 2.0 from src limit 1; + +ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns; + +explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1); + +explain select sum(order_amount) from ss where (year*10000+month*100+day) = "2015010" and 1>0; + +explain select sum(order_amount) from ss where (year*100+month*10+day) = "201511" and 1>0; + +explain select sum(order_amount) from ss where (year*100+month*10+day) > "201511" and 1>0; + +explain select '1' from ss where (year*100+month*10+day) > "201511"; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/test/results/clientpositive/stats_ppr_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/stats_ppr_all.q.out b/ql/src/test/results/clientpositive/stats_ppr_all.q.out new file mode 100644 index 0000000..d00c91e --- /dev/null +++ b/ql/src/test/results/clientpositive/stats_ppr_all.q.out @@ -0,0 +1,300 @@ +PREHOOK: query: drop table ss +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE ss ( + sales_order_id BIGINT, + order_amount FLOAT) +PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss +POSTHOOK: query: CREATE TABLE ss ( + sales_order_id BIGINT, + order_amount FLOAT) +PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss +PREHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=1) select 1, 22.0 from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +POSTHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=1) select 1, 22.0 from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=1).order_amount EXPRESSION [] +POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=1).sales_order_id EXPRESSION [] +PREHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=2, day=1) select 2, 2.0 from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 +POSTHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=2, day=1) select 2, 2.0 from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 +POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=2,day=1).order_amount EXPRESSION [] +POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=2,day=1).sales_order_id EXPRESSION [] +PREHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=2) select 1, 2.0 from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +POSTHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=2) select 1, 2.0 from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=2).order_amount EXPRESSION [] +POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=2).sales_order_id EXPRESSION [] +PREHOOK: query: ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 +PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 +PREHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 +POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 +POSTHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +#### A masked pattern was here #### +PREHOOK: query: explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1) +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: order_amount (type: float) + outputColumnNames: order_amount + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(order_amount) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select sum(order_amount) from ss where (year*10000+month*100+day) = "2015010" and 1>0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(order_amount) from ss where (year*10000+month*100+day) = "2015010" and 1>0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Filter Operator + predicate: ((((year * 10000) + (month * 100)) + day) = '2015010') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Select Operator + expressions: order_amount (type: float) + outputColumnNames: order_amount + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Group By Operator + aggregations: sum(order_amount) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) = "201511" and 1>0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) = "201511" and 1>0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: order_amount (type: float) + outputColumnNames: order_amount + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(order_amount) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) > "201511" and 1>0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) > "201511" and 1>0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((201500 + (month * 10)) + day) > '201511') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: order_amount (type: float) + outputColumnNames: order_amount + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator + aggregations: sum(order_amount) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select '1' from ss where (year*100+month*10+day) > "201511" +PREHOOK: type: QUERY +POSTHOOK: query: explain select '1' from ss where (year*100+month*10+day) > "201511" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: ss + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((201500 + (month * 10)) + day) > '201511') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator + expressions: '1' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + ListSink +