[hive] branch master updated: HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)

krisztiankasa Sun, 12 Feb 2023 21:56:17 -0800

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 9658838a46b HIVE-27007: Iceberg: Use BasicStats from iceberg table's 
currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by 
Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)
9658838a46b is described below

commit 9658838a46bcb0d07cc896ca17ad8dc7b2ba4b35
Author: SimhadriGovindappa <simhadri...@gmail.com>
AuthorDate: Mon Feb 13 11:25:46 2023 +0530

    HIVE-27007: Iceberg: Use BasicStats from iceberg table's 
currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by 
Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   2 +
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  45 ++-
 .../positive/use_basic_stats_from_iceberg.q        |  39 ++
 .../positive/use_basic_stats_from_iceberg.q.out    | 412 +++++++++++++++++++++
 .../apache/hadoop/hive/ql/stats/BasicStats.java    |   8 +
 5 files changed, 490 insertions(+), 16 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index b1b441dce7b..14d6837a3bd 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2207,6 +2207,8 @@ public class HiveConf extends Configuration {
         "padding tolerance config (hive.exec.orc.block.padding.tolerance)."),
     HIVE_ORC_CODEC_POOL("hive.use.orc.codec.pool", false,
         "Whether to use codec pool in ORC. Disable if there are bugs with 
codec reuse."),
+    HIVE_USE_STATS_FROM("hive.use.stats.from","iceberg","Use stats from 
iceberg table snapshot for query " +
+        "planning. This has three values metastore, puffin and iceberg"),
 
     HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
         "If this is set the header for RCFiles will simply be RCF.  If this is 
not\n" +
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index fc54f826e63..74c123f48d3 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -130,6 +130,9 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   private static final String ICEBERG_URI_PREFIX = "iceberg://";
   private static final Splitter TABLE_NAME_SPLITTER = Splitter.on("..");
   private static final String TABLE_NAME_SEPARATOR = "..";
+  private static final String ICEBERG = "iceberg";
+  private static final String PUFFIN = "puffin";
+
   /**
    * Function template for producing a custom sort expression function:
    * Takes the source column index and the bucket count to creat a function 
where Iceberg bucket UDF is used to build
@@ -312,24 +315,34 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
     TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
     Table table = Catalogs.loadTable(conf, tableDesc.getProperties());
+    String statsSource = HiveConf.getVar(conf, 
HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase();
     Map<String, String> stats = Maps.newHashMap();
-    if (table.currentSnapshot() != null) {
-      Map<String, String> summary = table.currentSnapshot().summary();
-      if (summary != null) {
-        if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
-          stats.put(StatsSetupConst.NUM_FILES, 
summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
-        }
-        if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
-          stats.put(StatsSetupConst.ROW_COUNT, 
summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
-        }
-        if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
-          stats.put(StatsSetupConst.TOTAL_SIZE, 
summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
+    switch (statsSource) {
+      case ICEBERG:
+        if (table.currentSnapshot() != null) {
+          Map<String, String> summary = table.currentSnapshot().summary();
+          if (summary != null) {
+            if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
+              stats.put(StatsSetupConst.NUM_FILES, 
summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
+            }
+            if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
+              stats.put(StatsSetupConst.ROW_COUNT, 
summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
+            }
+            if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
+              stats.put(StatsSetupConst.TOTAL_SIZE, 
summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
+            }
+          }
+        } else {
+          stats.put(StatsSetupConst.NUM_FILES, "0");
+          stats.put(StatsSetupConst.ROW_COUNT, "0");
+          stats.put(StatsSetupConst.TOTAL_SIZE, "0");
         }
-      }
-    } else {
-      stats.put(StatsSetupConst.NUM_FILES, "0");
-      stats.put(StatsSetupConst.ROW_COUNT, "0");
-      stats.put(StatsSetupConst.TOTAL_SIZE, "0");
+        break;
+      case PUFFIN:
+        // place holder for puffin
+        break;
+      default:
+        // fall back to metastore
     }
     return stats;
   }
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
 
b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
new file mode 100644
index 00000000000..90e2d95d1df
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
@@ -0,0 +1,39 @@
+-- Mask random uuid
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+set hive.stats.autogather=true;
+set hive.stats.column.autogather=true;
+
+drop table if exists tbl_ice;
+set hive.use.stats.from = metastore;
+create external table tbl_ice(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 
'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 
55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+
+drop table if exists tbl_ice;
+set hive.use.stats.from = iceberg;
+create external table tbl_ice(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 
'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 
55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+
+drop table if exists tbl_ice;
+drop table if exists  t1 ;
+drop table if exists  t2 ;
+create table t1 (a int) stored by iceberg tblproperties ('format-version'='2');
+create table t2 (b int) stored by iceberg tblproperties ('format-version'='2');
+describe formatted t1;
+describe formatted t2;
+explain select * from t1 join t2 on t1.a = t2.b;
+
+drop table if exists tbl_ice;
+create external table tbl_ice(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 
'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 
55), (333, 'two', 56);
+
+explain select * from tbl_ice order by a, b, c;
+select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 
'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 
55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out
new file mode 100644
index 00000000000..29f7fff01e8
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out
@@ -0,0 +1,412 @@
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: drop table if exists  t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists  t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists  t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists  t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties 
('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) stored by iceberg tblproperties 
('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (b int) stored by iceberg tblproperties 
('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (b int) stored by iceberg tblproperties 
('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: describe formatted t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: describe formatted t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name             data_type               comment             
+a                      int                                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
+       bucketing_version       2                   
+       engine.hive.enabled     true                
+       format-version          2                   
+       iceberg.orc.files.only  false               
+       metadata_location       hdfs://### HDFS PATH ###
+       numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
+       serialization.format    1                   
+       storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+       table_type              ICEBERG             
+       totalSize               0                   
+#### A masked pattern was here ####
+       uuid                    #Masked#
+       write.delete.mode       merge-on-read       
+       write.merge.mode        merge-on-read       
+       write.update.mode       merge-on-read       
+                
+# Storage Information           
+SerDe Library:         org.apache.iceberg.mr.hive.HiveIcebergSerDe      
+InputFormat:           org.apache.iceberg.mr.hive.HiveIcebergInputFormat       
 
+OutputFormat:          org.apache.iceberg.mr.hive.HiveIcebergOutputFormat      
 
+Compressed:            No                       
+Sort Columns:          []                       
+PREHOOK: query: describe formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: describe formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name             data_type               comment             
+b                      int                                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}}
+       bucketing_version       2                   
+       engine.hive.enabled     true                
+       format-version          2                   
+       iceberg.orc.files.only  false               
+       metadata_location       hdfs://### HDFS PATH ###
+       numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
+       serialization.format    1                   
+       storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+       table_type              ICEBERG             
+       totalSize               0                   
+#### A masked pattern was here ####
+       uuid                    #Masked#
+       write.delete.mode       merge-on-read       
+       write.merge.mode        merge-on-read       
+       write.update.mode       merge-on-read       
+                
+# Storage Information           
+SerDe Library:         org.apache.iceberg.mr.hive.HiveIcebergSerDe      
+InputFormat:           org.apache.iceberg.mr.hive.HiveIcebergInputFormat       
 
+OutputFormat:          org.apache.iceberg.mr.hive.HiveIcebergOutputFormat      
 
+Compressed:            No                       
+Sort Columns:          []                       
+PREHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2
+      File Output Operator [FS_10]
+        Merge Join Operator [MERGEJOIN_25] (rows=1 width=4)
+          Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_28]
+            PartitionCols:_col0
+            Select Operator [SEL_27] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_26] (rows=1 width=4)
+                predicate:a is not null
+                TableScan [TS_0] (rows=1 width=4)
+                  default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"]
+        <-Map 3 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_31]
+            PartitionCols:_col0
+            Select Operator [SEL_30] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_29] (rows=1 width=4)
+                predicate:b is not null
+                TableScan [TS_3] (rows=1 width=4)
+                  default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["b"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=1 width=192)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=1 width=192)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=1 width=192)
+                
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:NONE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      one     50
+2      two     51
+2      two     51
+2      two     51
+3      three   52
+4      four    53
+5      five    54
+111    one     55
+333    two     56
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=18 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=18 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=18 width=95)
+                
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+18
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
index 83e4f8e9da0..ba675dcd9d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
@@ -242,6 +242,7 @@ public class BasicStats {
   public BasicStats(Partish p) {
     partish = p;
 
+    checkForBasicStatsFromStorageHandler();
     rowCount = parseLong(StatsSetupConst.ROW_COUNT);
     rawDataSize = parseLong(StatsSetupConst.RAW_DATA_SIZE);
     totalSize = parseLong(StatsSetupConst.TOTAL_SIZE);
@@ -281,6 +282,13 @@ public class BasicStats {
 
   }
 
+  private void checkForBasicStatsFromStorageHandler() {
+    if (partish.getTable() != null && partish.getTable().isNonNative() &&
+        partish.getTable().getStorageHandler().canProvideBasicStatistics()) {
+      
partish.getPartParameters().putAll(partish.getTable().getStorageHandler().getBasicStatistics(partish));
+    }
+  }
+
   public long getNumRows() {
     return currentNumRows;
   }

[hive] branch master updated: HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)

Reply via email to