This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 9658838a46b HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi) 9658838a46b is described below commit 9658838a46bcb0d07cc896ca17ad8dc7b2ba4b35 Author: SimhadriGovindappa <simhadri...@gmail.com> AuthorDate: Mon Feb 13 11:25:46 2023 +0530 HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 45 ++- .../positive/use_basic_stats_from_iceberg.q | 39 ++ .../positive/use_basic_stats_from_iceberg.q.out | 412 +++++++++++++++++++++ .../apache/hadoop/hive/ql/stats/BasicStats.java | 8 + 5 files changed, 490 insertions(+), 16 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b1b441dce7b..14d6837a3bd 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2207,6 +2207,8 @@ public class HiveConf extends Configuration { "padding tolerance config (hive.exec.orc.block.padding.tolerance)."), HIVE_ORC_CODEC_POOL("hive.use.orc.codec.pool", false, "Whether to use codec pool in ORC. Disable if there are bugs with codec reuse."), + HIVE_USE_STATS_FROM("hive.use.stats.from","iceberg","Use stats from iceberg table snapshot for query " + + "planning. This has three values metastore, puffin and iceberg"), HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true, "If this is set the header for RCFiles will simply be RCF. If this is not\n" + diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index fc54f826e63..74c123f48d3 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -130,6 +130,9 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H private static final String ICEBERG_URI_PREFIX = "iceberg://"; private static final Splitter TABLE_NAME_SPLITTER = Splitter.on(".."); private static final String TABLE_NAME_SEPARATOR = ".."; + private static final String ICEBERG = "iceberg"; + private static final String PUFFIN = "puffin"; + /** * Function template for producing a custom sort expression function: * Takes the source column index and the bucket count to creat a function where Iceberg bucket UDF is used to build @@ -312,24 +315,34 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable(); TableDesc tableDesc = Utilities.getTableDesc(hmsTable); Table table = Catalogs.loadTable(conf, tableDesc.getProperties()); + String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase(); Map<String, String> stats = Maps.newHashMap(); - if (table.currentSnapshot() != null) { - Map<String, String> summary = table.currentSnapshot().summary(); - if (summary != null) { - if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) { - stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP)); - } - if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) { - stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP)); - } - if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) { - stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP)); + switch (statsSource) { + case ICEBERG: + if (table.currentSnapshot() != null) { + Map<String, String> summary = table.currentSnapshot().summary(); + if (summary != null) { + if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) { + stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP)); + } + if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) { + stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP)); + } + if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) { + stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP)); + } + } + } else { + stats.put(StatsSetupConst.NUM_FILES, "0"); + stats.put(StatsSetupConst.ROW_COUNT, "0"); + stats.put(StatsSetupConst.TOTAL_SIZE, "0"); } - } - } else { - stats.put(StatsSetupConst.NUM_FILES, "0"); - stats.put(StatsSetupConst.ROW_COUNT, "0"); - stats.put(StatsSetupConst.TOTAL_SIZE, "0"); + break; + case PUFFIN: + // place holder for puffin + break; + default: + // fall back to metastore } return stats; } diff --git a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q new file mode 100644 index 00000000000..90e2d95d1df --- /dev/null +++ b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q @@ -0,0 +1,39 @@ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ +set hive.stats.autogather=true; +set hive.stats.column.autogather=true; + +drop table if exists tbl_ice; +set hive.use.stats.from = metastore; +create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); +insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); +explain select * from tbl_ice order by a, b, c; + +drop table if exists tbl_ice; +set hive.use.stats.from = iceberg; +create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); +insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); +explain select * from tbl_ice order by a, b, c; + +drop table if exists tbl_ice; +drop table if exists t1 ; +drop table if exists t2 ; +create table t1 (a int) stored by iceberg tblproperties ('format-version'='2'); +create table t2 (b int) stored by iceberg tblproperties ('format-version'='2'); +describe formatted t1; +describe formatted t2; +explain select * from t1 join t2 on t1.a = t2.b; + +drop table if exists tbl_ice; +create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); +explain select * from tbl_ice order by a, b, c; +select count(*) from tbl_ice ; +insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); + +explain select * from tbl_ice order by a, b, c; +select * from tbl_ice order by a, b, c; +select count(*) from tbl_ice ; + +insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); +explain select * from tbl_ice order by a, b, c; +select count(*) from tbl_ice ; diff --git a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out new file mode 100644 index 00000000000..29f7fff01e8 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out @@ -0,0 +1,412 @@ +PREHOOK: query: drop table if exists tbl_ice +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tbl_ice +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: explain select * from tbl_ice order by a, b, c +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select * from tbl_ice order by a, b, c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_8] + Select Operator [SEL_7] (rows=9 width=95) + Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_6] + Select Operator [SEL_5] (rows=9 width=95) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=9 width=95) + default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + +PREHOOK: query: drop table if exists tbl_ice +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: drop table if exists tbl_ice +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: explain select * from tbl_ice order by a, b, c +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select * from tbl_ice order by a, b, c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_8] + Select Operator [SEL_7] (rows=9 width=95) + Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_6] + Select Operator [SEL_5] (rows=9 width=95) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=9 width=95) + default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + +PREHOOK: query: drop table if exists tbl_ice +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: drop table if exists tbl_ice +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: drop table if exists t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: describe formatted t1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t1 +POSTHOOK: query: describe formatted t1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t1 +# col_name data_type comment +a int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} + bucketing_version 2 + engine.hive.enabled true + format-version 2 + iceberg.orc.files.only false + metadata_location hdfs://### HDFS PATH ### + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.format 1 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize 0 +#### A masked pattern was here #### + uuid #Masked# + write.delete.mode merge-on-read + write.merge.mode merge-on-read + write.update.mode merge-on-read + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] +PREHOOK: query: describe formatted t2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t2 +POSTHOOK: query: describe formatted t2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t2 +# col_name data_type comment +b int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}} + bucketing_version 2 + engine.hive.enabled true + format-version 2 + iceberg.orc.files.only false + metadata_location hdfs://### HDFS PATH ### + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.format 1 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize 0 +#### A masked pattern was here #### + uuid #Masked# + write.delete.mode merge-on-read + write.merge.mode merge-on-read + write.update.mode merge-on-read + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] +PREHOOK: query: explain select * from t1 join t2 on t1.a = t2.b +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select * from t1 join t2 on t1.a = t2.b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_10] + Merge Join Operator [MERGEJOIN_25] (rows=1 width=4) + Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_26] (rows=1 width=4) + predicate:a is not null + TableScan [TS_0] (rows=1 width=4) + default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"] + <-Map 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_31] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_29] (rows=1 width=4) + predicate:b is not null + TableScan [TS_3] (rows=1 width=4) + default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["b"] + +PREHOOK: query: drop table if exists tbl_ice +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tbl_ice +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: explain select * from tbl_ice order by a, b, c +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select * from tbl_ice order by a, b, c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_8] + Select Operator [SEL_7] (rows=1 width=192) + Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_6] + Select Operator [SEL_5] (rows=1 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1 width=192) + default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:NONE,Output:["a","b","c"] + +PREHOOK: query: select count(*) from tbl_ice +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from tbl_ice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: explain select * from tbl_ice order by a, b, c +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select * from tbl_ice order by a, b, c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_8] + Select Operator [SEL_7] (rows=9 width=95) + Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_6] + Select Operator [SEL_5] (rows=9 width=95) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=9 width=95) + default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + +PREHOOK: query: select * from tbl_ice order by a, b, c +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from tbl_ice order by a, b, c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 one 50 +2 two 51 +2 two 51 +2 two 51 +3 three 52 +4 four 53 +5 five 54 +111 one 55 +333 two 56 +PREHOOK: query: select count(*) from tbl_ice +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from tbl_ice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +9 +PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice +POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice +PREHOOK: query: explain select * from tbl_ice order by a, b, c +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select * from tbl_ice order by a, b, c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized + File Output Operator [FS_8] + Select Operator [SEL_7] (rows=18 width=95) + Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_6] + Select Operator [SEL_5] (rows=18 width=95) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=18 width=95) + default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + +PREHOOK: query: select count(*) from tbl_ice +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from tbl_ice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +18 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java index 83e4f8e9da0..ba675dcd9d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java @@ -242,6 +242,7 @@ public class BasicStats { public BasicStats(Partish p) { partish = p; + checkForBasicStatsFromStorageHandler(); rowCount = parseLong(StatsSetupConst.ROW_COUNT); rawDataSize = parseLong(StatsSetupConst.RAW_DATA_SIZE); totalSize = parseLong(StatsSetupConst.TOTAL_SIZE); @@ -281,6 +282,13 @@ public class BasicStats { } + private void checkForBasicStatsFromStorageHandler() { + if (partish.getTable() != null && partish.getTable().isNonNative() && + partish.getTable().getStorageHandler().canProvideBasicStatistics()) { + partish.getPartParameters().putAll(partish.getTable().getStorageHandler().getBasicStatistics(partish)); + } + } + public long getNumRows() { return currentNumRows; }