This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d486f587df0 HIVE-28702: Iceberg: Statistics are inconsistent on time
travel queries (Shohei Okumiya, reviewed by Denys Kuzmenko)
d486f587df0 is described below
commit d486f587df0954f43f0ef6e7dabe7aafe25b1cc4
Author: Shohei Okumiya <[email protected]>
AuthorDate: Tue Jan 14 00:30:06 2025 +0900
HIVE-28702: Iceberg: Statistics are inconsistent on time travel queries
(Shohei Okumiya, reviewed by Denys Kuzmenko)
Closes #5607
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 16 +-
.../positive/puffin_col_stats_with_time_travel.q | 25 +++
.../llap/puffin_col_stats_with_time_travel.q.out | 224 +++++++++++++++++++++
.../results/positive/write_iceberg_branch.q.out | 50 ++---
.../test/resources/testconfiguration.properties | 4 +-
5 files changed, 290 insertions(+), 29 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 9058f62963d..9ecd964c0c1 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -585,7 +585,11 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
@Override
public boolean
canProvideColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
- return canSetColStatistics(hmsTable) && canProvideColStats(table,
table.currentSnapshot().snapshotId());
+ Snapshot snapshot = IcebergTableUtil.getTableSnapshot(hmsTable, table);
+ if (snapshot == null) {
+ return false;
+ }
+ return canSetColStatistics(hmsTable) && canProvideColStats(table,
snapshot.snapshotId());
}
private boolean canProvideColStats(Table table, long snapshotId) {
@@ -595,8 +599,14 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
@Override
public List<ColumnStatisticsObj>
getColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
- return IcebergTableUtil.getColStatsPath(table).map(statsPath ->
readColStats(table, statsPath))
- .orElse(new ColumnStatistics()).getStatsObj();
+ Snapshot snapshot = IcebergTableUtil.getTableSnapshot(hmsTable, table);
+ ColumnStatistics emptyStats = new ColumnStatistics();
+ if (snapshot == null) {
+ return emptyStats.getStatsObj();
+ }
+ long snapshotId = IcebergTableUtil.getTableSnapshot(hmsTable,
table).snapshotId();
+ return IcebergTableUtil.getColStatsPath(table, snapshotId).map(statsPath
-> readColStats(table, statsPath))
+ .orElse(emptyStats).getStatsObj();
}
private ColumnStatistics readColStats(Table table, Path statsPath) {
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q
b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q
new file mode 100644
index 00000000000..243b358eb53
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q
@@ -0,0 +1,25 @@
+set hive.fetch.task.conversion=none;
+
+create external table default.tbl_ice_puffin_time_travel(a int, b string, c
int) stored by iceberg;
+insert into default.tbl_ice_puffin_time_travel values (1, 'one', 50), (2,
'two', 51);
+alter table default.tbl_ice_puffin_time_travel create tag checkpoint;
+
+explain select * from default.tbl_ice_puffin_time_travel;
+explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint;
+
+insert into tbl_ice_puffin_time_travel values
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null);
+
+explain select * from default.tbl_ice_puffin_time_travel;
+explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint;
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out
new file mode 100644
index 00000000000..8bdfc189462
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out
@@ -0,0 +1,224 @@
+PREHOOK: query: create external table default.tbl_ice_puffin_time_travel(a
int, b string, c int) stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: create external table default.tbl_ice_puffin_time_travel(a
int, b string, c int) stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin_time_travel
+PREHOOK: query: insert into default.tbl_ice_puffin_time_travel values (1,
'one', 50), (2, 'two', 51)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: insert into default.tbl_ice_puffin_time_travel values (1,
'one', 50), (2, 'two', 51)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin_time_travel
+PREHOOK: query: alter table default.tbl_ice_puffin_time_travel create tag
checkpoint
+PREHOOK: type: ALTERTABLE_CREATETAG
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: alter table default.tbl_ice_puffin_time_travel create tag
checkpoint
+POSTHOOK: type: ALTERTABLE_CREATETAG
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+PREHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_puffin_time_travel
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), b (type: string), c (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from
default.tbl_ice_puffin_time_travel.tag_checkpoint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from
default.tbl_ice_puffin_time_travel.tag_checkpoint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_puffin_time_travel
+ Snapshot ref: tag_checkpoint
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), b (type: string), c (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: insert into tbl_ice_puffin_time_travel values
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin_time_travel
+POSTHOOK: query: insert into tbl_ice_puffin_time_travel values
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null),
+(null, null, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin_time_travel
+PREHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.tbl_ice_puffin_time_travel
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_puffin_time_travel
+ Statistics: Num rows: 14 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), b (type: string), c (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 14 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from
default.tbl_ice_puffin_time_travel.tag_checkpoint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from
default.tbl_ice_puffin_time_travel.tag_checkpoint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin_time_travel
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_puffin_time_travel
+ Snapshot ref: tag_checkpoint
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), b (type: string), c (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out
b/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out
index cbf1e0562d1..ea0f7a5ed52 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/write_iceberg_branch.q.out
@@ -237,14 +237,14 @@ STAGE PLANS:
Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
predicate: (a = 22) (type: boolean)
- Statistics: Num rows: 3 Data size: 291 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 97 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: PARTITION__SPEC__ID (type: int),
PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type:
bigint), PARTITION__PROJECTION (type: string), 22 (type: int), b (type:
string), c (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7
- Statistics: Num rows: 3 Data size: 1455 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 485 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 1455 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 485 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -319,18 +319,18 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 388 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
predicate: (c = 66) (type: boolean)
- Statistics: Num rows: 2 Data size: 194 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 97 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: PARTITION__SPEC__ID (type: int),
PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type:
bigint), PARTITION__PROJECTION (type: string), a (type: int), b (type: string),
b (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col9
- Statistics: Num rows: 2 Data size: 1140 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 570 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint),
_col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type:
int), _col6 (type: string), 66 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7
- Statistics: Num rows: 2 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 485 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 485 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -339,10 +339,10 @@ STAGE PLANS:
Select Operator
expressions: 33 (type: int), _col9 (type: string), 66
(type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 194 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 97 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 194 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 97 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -476,11 +476,11 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col5 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 4 Data size: 2324 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 3385 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
expressions: _col1 (type: string), _col0 (type: int), _col5
(type: string), _col7 (type: string), _col2 (type: int), _col6 (type: bigint),
_col4 (type: bigint), _col3 (type: int), _col10 (type: int), _col9 (type:
string), _col8 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 4 Data size: 2324 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 3385 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((_col10 = _col1) and (_col10 > 100)) (type:
boolean)
Statistics: Num rows: 1 Data size: 581 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -498,14 +498,14 @@ STAGE PLANS:
name: default.ice01
Filter Operator
predicate: ((_col10 = _col1) and (_col10 <= 100)) (type:
boolean)
- Statistics: Num rows: 2 Data size: 1162 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 1935 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col7 (type: int), _col6 (type: bigint),
_col2 (type: string), _col5 (type: bigint), _col3 (type: string), _col10 (type:
int), _col9 (type: string), _col8 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7
- Statistics: Num rows: 2 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 1455 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 970 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 1455 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -513,14 +513,14 @@ STAGE PLANS:
name: default.ice01
Filter Operator
predicate: ((_col10 = _col1) and (_col10 <= 100)) (type:
boolean)
- Statistics: Num rows: 2 Data size: 1162 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 1935 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col10 (type: int), 'Merged' (type:
string), (_col8 + 10) (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 196 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 482 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 196 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 482 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -528,14 +528,14 @@ STAGE PLANS:
name: default.ice01
Filter Operator
predicate: _col10 is null (type: boolean)
- Statistics: Num rows: 1 Data size: 581 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6 Data size: 2031 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int), _col0 (type: string),
_col4 (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 96 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6 Data size: 576 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 96 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 6 Data size: 576 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
@@ -543,24 +543,24 @@ STAGE PLANS:
name: default.ice01
Filter Operator
predicate: (_col10 = _col1) (type: boolean)
- Statistics: Num rows: 2 Data size: 1162 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 1935 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col5 (type: bigint),
_col6 (type: bigint), _col7 (type: int)
outputColumnNames: _col2, _col5, _col6, _col7
- Statistics: Num rows: 2 Data size: 1162 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 1935 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: _col7 (type: int), _col6 (type: bigint), _col2
(type: string), _col5 (type: bigint)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 2 Data size: 424 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 644 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type:
bigint), _col2 (type: string), _col3 (type: bigint)
null sort order: zzzz
sort order: ++++
Map-reduce partition columns: _col0 (type: int),
_col1 (type: bigint), _col2 (type: string), _col3 (type: bigint)
- Statistics: Num rows: 2 Data size: 424 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 644 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col4 (type: bigint)
Reducer 3
Execution mode: vectorized
@@ -570,7 +570,7 @@ STAGE PLANS:
keys: KEY._col0 (type: int), KEY._col1 (type: bigint),
KEY._col2 (type: string), KEY._col3 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 2 Data size: 424 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 644 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
predicate: (_col4 > 1L) (type: boolean)
Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE
Column stats: COMPLETE
diff --git a/itests/src/test/resources/testconfiguration.properties
b/itests/src/test/resources/testconfiguration.properties
index 21e639499fa..d39f90c34e6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -423,6 +423,7 @@ iceberg.llap.query.files=\
iceberg_merge_files.q,\
llap_iceberg_read_orc.q,\
llap_iceberg_read_parquet.q,\
+ puffin_col_stats_with_time_travel.q,\
vectorized_iceberg_read_mixed.q,\
vectorized_iceberg_read_multitable.q,\
vectorized_iceberg_read_orc.q,\
@@ -457,7 +458,8 @@ iceberg.llap.only.query.files=\
iceberg_merge_delete_files.q,\
iceberg_merge_files.q,\
llap_iceberg_read_orc.q,\
- llap_iceberg_read_parquet.q
+ llap_iceberg_read_parquet.q,\
+ puffin_col_stats_with_time_travel.q
compaction.query.files=\
compaction_query_based.q,\