This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d486f587df0 HIVE-28702: Iceberg: Statistics are inconsistent on time 
travel queries (Shohei Okumiya, reviewed by Denys Kuzmenko)
d486f587df0 is described below

commit d486f587df0954f43f0ef6e7dabe7aafe25b1cc4
Author: Shohei Okumiya <[email protected]>
AuthorDate: Tue Jan 14 00:30:06 2025 +0900

    HIVE-28702: Iceberg: Statistics are inconsistent on time travel queries 
(Shohei Okumiya, reviewed by Denys Kuzmenko)
    
    Closes #5607
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  16 +-
 .../positive/puffin_col_stats_with_time_travel.q   |  25 +++
 .../llap/puffin_col_stats_with_time_travel.q.out   | 224 +++++++++++++++++++++
 .../results/positive/write_iceberg_branch.q.out    |  50 ++---
 .../test/resources/testconfiguration.properties    |   4 +-
 5 files changed, 290 insertions(+), 29 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 9058f62963d..9ecd964c0c1 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -585,7 +585,11 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   @Override
   public boolean 
canProvideColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
     Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
-    return canSetColStatistics(hmsTable) && canProvideColStats(table, 
table.currentSnapshot().snapshotId());
+    Snapshot snapshot = IcebergTableUtil.getTableSnapshot(hmsTable, table);
+    if (snapshot == null) {
+      return false;
+    }
+    return canSetColStatistics(hmsTable) && canProvideColStats(table, 
snapshot.snapshotId());
   }
 
   private boolean canProvideColStats(Table table, long snapshotId) {
@@ -595,8 +599,14 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   @Override
   public List<ColumnStatisticsObj> 
getColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
     Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
-    return IcebergTableUtil.getColStatsPath(table).map(statsPath -> 
readColStats(table, statsPath))
-      .orElse(new ColumnStatistics()).getStatsObj();
+    Snapshot snapshot = IcebergTableUtil.getTableSnapshot(hmsTable, table);
+    ColumnStatistics emptyStats = new ColumnStatistics();
+    if (snapshot == null) {
+      return emptyStats.getStatsObj();
+    }
+    long snapshotId = IcebergTableUtil.getTableSnapshot(hmsTable, 
table).snapshotId();
+    return IcebergTableUtil.getColStatsPath(table, snapshotId).map(statsPath 
-> readColStats(table, statsPath))
+      .orElse(emptyStats).getStatsObj();
   }
 
   private ColumnStatistics readColStats(Table table, Path statsPath) {
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q
 
b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q
new file mode 100644
index 00000000000..243b358eb53
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q
@@ -0,0 +1,25 @@
+set hive.fetch.task.conversion=none;
+
+create external table default.tbl_ice_puffin_time_travel(a int, b string, c 
int) stored by iceberg;
+insert into default.tbl_ice_puffin_time_travel values (1, 'one', 50), (2, 
'two', 51);
+alter table default.tbl_ice_puffin_time_travel create tag checkpoint;
+
+explain select * from default.tbl_ice_puffin_time_travel;
+explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint;
+
+insert into tbl_ice_puffin_time_travel values
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null);
+
+explain select * from default.tbl_ice_puffin_time_travel;
+explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out
new file mode 100644
index 00000000000..8bdfc189462
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out
@@ -0,0 +1,224 @@
+PREHOOK: query: create external table default.tbl_ice_puffin_time_travel(a 
int, b string, c int) stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: create external table default.tbl_ice_puffin_time_travel(a 
int, b string, c int) stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin_time_travel
+PREHOOK: query: insert into default.tbl_ice_puffin_time_travel values (1, 
'one', 50), (2, 'two', 51)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: insert into default.tbl_ice_puffin_time_travel values (1, 
'one', 50), (2, 'two', 51)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin_time_travel
+PREHOOK: query: alter table default.tbl_ice_puffin_time_travel create tag 
checkpoint
+PREHOOK: type: ALTERTABLE_CREATETAG
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: alter table default.tbl_ice_puffin_time_travel create tag 
checkpoint
+POSTHOOK: type: ALTERTABLE_CREATETAG
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+PREHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_puffin_time_travel
+                  Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: a (type: int), b (type: string), c (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select * from 
default.tbl_ice_puffin_time_travel.tag_checkpoint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from 
default.tbl_ice_puffin_time_travel.tag_checkpoint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_puffin_time_travel
+                  Snapshot ref: tag_checkpoint
+                  Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: a (type: int), b (type: string), c (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: insert into tbl_ice_puffin_time_travel values
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: insert into tbl_ice_puffin_time_travel values
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin_time_travel
+PREHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_puffin_time_travel
+                  Statistics: Num rows: 14 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: a (type: int), b (type: string), c (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 14 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 14 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select * from 
default.tbl_ice_puffin_time_travel.tag_checkpoint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from 
default.tbl_ice_puffin_time_travel.tag_checkpoint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_puffin_time_travel
+                  Snapshot ref: tag_checkpoint
+                  Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: a (type: int), b (type: string), c (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out
index cbf1e0562d1..ea0f7a5ed52 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out
@@ -237,14 +237,14 @@ STAGE PLANS:
                   Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Filter Operator
                     predicate: (a = 22) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 291 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 97 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: PARTITION__SPEC__ID (type: int), 
PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: 
bigint), PARTITION__PROJECTION (type: string), 22 (type: int), b (type: 
string), c (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
-                      Statistics: Num rows: 3 Data size: 1455 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 485 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 3 Data size: 1455 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 485 Basic stats: 
COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                             output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -319,18 +319,18 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Filter Operator
                     predicate: (c = 66) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 194 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 97 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: PARTITION__SPEC__ID (type: int), 
PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: 
bigint), PARTITION__PROJECTION (type: string), a (type: int), b (type: string), 
b (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col9
-                      Statistics: Num rows: 2 Data size: 1140 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 570 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Select Operator
                         expressions: _col0 (type: int), _col1 (type: bigint), 
_col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), 66 (type: int)
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
-                        Statistics: Num rows: 2 Data size: 970 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 485 Basic stats: 
COMPLETE Column stats: COMPLETE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 2 Data size: 970 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1 Data size: 485 Basic stats: 
COMPLETE Column stats: COMPLETE
                           table:
                               input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                               output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -339,10 +339,10 @@ STAGE PLANS:
                       Select Operator
                         expressions: 33 (type: int), _col9 (type: string), 66 
(type: int)
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 2 Data size: 194 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 97 Basic stats: 
COMPLETE Column stats: COMPLETE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 2 Data size: 194 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1 Data size: 97 Basic stats: 
COMPLETE Column stats: COMPLETE
                           table:
                               input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                               output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -476,11 +476,11 @@ STAGE PLANS:
                   0 _col0 (type: int)
                   1 _col5 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
-                Statistics: Num rows: 4 Data size: 2324 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 10 Data size: 3385 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: int), _col5 
(type: string), _col7 (type: string), _col2 (type: int), _col6 (type: bigint), 
_col4 (type: bigint), _col3 (type: int), _col10 (type: int), _col9 (type: 
string), _col8 (type: int)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
-                  Statistics: Num rows: 4 Data size: 2324 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 10 Data size: 3385 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((_col10 = _col1) and (_col10 > 100)) (type: 
boolean)
                     Statistics: Num rows: 1 Data size: 581 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -498,14 +498,14 @@ STAGE PLANS:
                             name: default.ice01
                   Filter Operator
                     predicate: ((_col10 = _col1) and (_col10 <= 100)) (type: 
boolean)
-                    Statistics: Num rows: 2 Data size: 1162 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 5 Data size: 1935 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: _col7 (type: int), _col6 (type: bigint), 
_col2 (type: string), _col5 (type: bigint), _col3 (type: string), _col10 (type: 
int), _col9 (type: string), _col8 (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
-                      Statistics: Num rows: 2 Data size: 970 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 5 Data size: 1455 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 2 Data size: 970 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 5 Data size: 1455 Basic stats: 
COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                             output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -513,14 +513,14 @@ STAGE PLANS:
                             name: default.ice01
                   Filter Operator
                     predicate: ((_col10 = _col1) and (_col10 <= 100)) (type: 
boolean)
-                    Statistics: Num rows: 2 Data size: 1162 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 5 Data size: 1935 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: _col10 (type: int), 'Merged' (type: 
string), (_col8 + 10) (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 2 Data size: 196 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 5 Data size: 482 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 2 Data size: 196 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 5 Data size: 482 Basic stats: 
COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                             output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -528,14 +528,14 @@ STAGE PLANS:
                             name: default.ice01
                   Filter Operator
                     predicate: _col10 is null (type: boolean)
-                    Statistics: Num rows: 1 Data size: 581 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 6 Data size: 2031 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: _col1 (type: int), _col0 (type: string), 
_col4 (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 6 Data size: 576 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 6 Data size: 576 Basic stats: 
COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                             output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -543,24 +543,24 @@ STAGE PLANS:
                             name: default.ice01
                   Filter Operator
                     predicate: (_col10 = _col1) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 1162 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 5 Data size: 1935 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: _col2 (type: string), _col5 (type: bigint), 
_col6 (type: bigint), _col7 (type: int)
                       outputColumnNames: _col2, _col5, _col6, _col7
-                      Statistics: Num rows: 2 Data size: 1162 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 5 Data size: 1935 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: count()
                         keys: _col7 (type: int), _col6 (type: bigint), _col2 
(type: string), _col5 (type: bigint)
                         minReductionHashAggr: 0.4
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                        Statistics: Num rows: 2 Data size: 424 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 4 Data size: 644 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: int), _col1 (type: 
bigint), _col2 (type: string), _col3 (type: bigint)
                           null sort order: zzzz
                           sort order: ++++
                           Map-reduce partition columns: _col0 (type: int), 
_col1 (type: bigint), _col2 (type: string), _col3 (type: bigint)
-                          Statistics: Num rows: 2 Data size: 424 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 4 Data size: 644 Basic stats: 
COMPLETE Column stats: COMPLETE
                           value expressions: _col4 (type: bigint)
         Reducer 3 
             Execution mode: vectorized
@@ -570,7 +570,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: int), KEY._col1 (type: bigint), 
KEY._col2 (type: string), KEY._col3 (type: bigint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 2 Data size: 424 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 4 Data size: 644 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Filter Operator
                   predicate: (_col4 > 1L) (type: boolean)
                   Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE 
Column stats: COMPLETE
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 21e639499fa..d39f90c34e6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -423,6 +423,7 @@ iceberg.llap.query.files=\
   iceberg_merge_files.q,\
   llap_iceberg_read_orc.q,\
   llap_iceberg_read_parquet.q,\
+  puffin_col_stats_with_time_travel.q,\
   vectorized_iceberg_read_mixed.q,\
   vectorized_iceberg_read_multitable.q,\
   vectorized_iceberg_read_orc.q,\
@@ -457,7 +458,8 @@ iceberg.llap.only.query.files=\
   iceberg_merge_delete_files.q,\
   iceberg_merge_files.q,\
   llap_iceberg_read_orc.q,\
-  llap_iceberg_read_parquet.q
+  llap_iceberg_read_parquet.q,\
+  puffin_col_stats_with_time_travel.q
 
 compaction.query.files=\
   compaction_query_based.q,\

Reply via email to