This is an automated email from the ASF dual-hosted git repository.
okumin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 28abf17424e HIVE-28765: Iceberg: Incorrect partition statistics on
time travel + partition evolution (#5748)
28abf17424e is described below
commit 28abf17424e97112d75947f1d7a2f043b3961fa4
Author: Shohei Okumiya <[email protected]>
AuthorDate: Wed Apr 23 20:23:23 2025 +0900
HIVE-28765: Iceberg: Incorrect partition statistics on time travel +
partition evolution (#5748)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 10 +-
.../iceberg_major_compaction_partition_evolution.q | 26 +
...berg_major_compaction_partition_evolution.q.out | 557 +++++++++++++++++++++
3 files changed, 590 insertions(+), 3 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 013bf168cf3..82b9d9084e8 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -2161,12 +2161,16 @@ public List<Partition>
getPartitions(org.apache.hadoop.hive.ql.metadata.Table hm
}
public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table
hmsTable) {
- if ((hmsTable.getAsOfVersion() != null || hmsTable.getAsOfTimestamp() !=
null) &&
- hasUndergonePartitionEvolution(hmsTable) ||
- !hmsTable.getTTable().isSetId()) {
+ if (!hmsTable.getTTable().isSetId()) {
return false;
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+ Snapshot snapshot = IcebergTableUtil.getTableSnapshot(table, hmsTable);
+
+ boolean readsNonCurrentSnapshot = snapshot != null &&
!snapshot.equals(table.currentSnapshot());
+ if (readsNonCurrentSnapshot && hasUndergonePartitionEvolution(table)) {
+ return false;
+ }
return table.spec().isPartitioned();
}
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution.q
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution.q
index 454ea83732f..a38a856f643 100644
---
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution.q
+++
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution.q
@@ -38,11 +38,15 @@ insert into ice_orc VALUES ('fn1','ln1', 1, 10, 100);
insert into ice_orc VALUES ('fn2','ln2', 1, 10, 100);
insert into ice_orc VALUES ('fn3','ln3', 1, 11, 100);
insert into ice_orc VALUES (null,null, null, null, null);
+alter table ice_orc create tag v1;
+
alter table ice_orc set partition spec(company_id, dept_id);
insert into ice_orc VALUES ('fn4','ln4', 1, 11, 100);
insert into ice_orc VALUES ('fn5','ln5', 2, 20, 100);
insert into ice_orc VALUES ('fn6','ln6', 2, 20, 100);
insert into ice_orc VALUES (null,null, null, null, null);
+alter table ice_orc create tag v2;
+
alter table ice_orc set partition spec(company_id, dept_id, team_id);
insert into ice_orc VALUES ('fn7','ln7', 2, 21, 100);
insert into ice_orc VALUES ('fn8','ln8', 2, 21, 100);
@@ -52,6 +56,8 @@ update ice_orc set last_name = 'ln1a' where first_name='fn1';
update ice_orc set last_name = 'ln2a' where first_name='fn2';
update ice_orc set last_name = 'ln3a' where first_name='fn3';
update ice_orc set last_name = 'ln4a' where first_name='fn4';
+alter table ice_orc create tag v3;
+
alter table ice_orc set partition spec(company_id, dept_id);
update ice_orc set last_name = 'ln5a' where first_name='fn5';
update ice_orc set last_name = 'ln6a' where first_name='fn6';
@@ -59,6 +65,7 @@ update ice_orc set last_name = 'ln7a' where first_name='fn7';
update ice_orc set last_name = 'ln8a' where first_name='fn8';
delete from ice_orc where last_name in ('ln1a', 'ln8a');
+alter table ice_orc create tag v4;
select * from ice_orc;
describe formatted ice_orc;
@@ -67,6 +74,21 @@ select `partition`, spec_id, content, record_count
from default.ice_orc.files
order by `partition`, spec_id, content, record_count;
+-- Disable fetch tasks to see statistics
+set hive.fetch.task.conversion=none;
+explain select * from default.ice_orc.tag_v1;
+explain select * from default.ice_orc.tag_v2;
+explain select * from default.ice_orc.tag_v3;
+explain select * from default.ice_orc.tag_v4;
+explain select * from ice_orc;
+
+explain select * from default.ice_orc.tag_v1 where company_id is not null;
+explain select * from default.ice_orc.tag_v2 where company_id is not null;
+explain select * from default.ice_orc.tag_v3 where company_id is not null;
+explain select * from default.ice_orc.tag_v4 where company_id is not null;
+explain select * from ice_orc where company_id is not null;
+set hive.fetch.task.conversion=more;
+
explain alter table ice_orc COMPACT 'major' and wait;
alter table ice_orc COMPACT 'major' and wait;
@@ -77,3 +99,7 @@ show compactions order by 'partition';
select `partition`, spec_id, content, record_count
from default.ice_orc.files
order by `partition`, spec_id, content, record_count;
+
+set hive.fetch.task.conversion=none;
+explain select * from ice_orc;
+explain select * from ice_orc where company_id is not null;
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out
index adcf5844d65..10dffc77d9b 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out
@@ -54,6 +54,12 @@ POSTHOOK: query: insert into ice_orc VALUES (null,null,
null, null, null)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_orc
+PREHOOK: query: alter table ice_orc create tag v1
+PREHOOK: type: ALTERTABLE_CREATETAG
+PREHOOK: Input: default@ice_orc
+POSTHOOK: query: alter table ice_orc create tag v1
+POSTHOOK: type: ALTERTABLE_CREATETAG
+POSTHOOK: Input: default@ice_orc
PREHOOK: query: alter table ice_orc set partition spec(company_id, dept_id)
PREHOOK: type: ALTERTABLE_SETPARTSPEC
PREHOOK: Input: default@ice_orc
@@ -93,6 +99,12 @@ POSTHOOK: query: insert into ice_orc VALUES (null,null,
null, null, null)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_orc
+PREHOOK: query: alter table ice_orc create tag v2
+PREHOOK: type: ALTERTABLE_CREATETAG
+PREHOOK: Input: default@ice_orc
+POSTHOOK: query: alter table ice_orc create tag v2
+POSTHOOK: type: ALTERTABLE_CREATETAG
+POSTHOOK: Input: default@ice_orc
PREHOOK: query: alter table ice_orc set partition spec(company_id, dept_id,
team_id)
PREHOOK: type: ALTERTABLE_SETPARTSPEC
PREHOOK: Input: default@ice_orc
@@ -164,6 +176,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: default@ice_orc
POSTHOOK: Output: default@ice_orc
+PREHOOK: query: alter table ice_orc create tag v3
+PREHOOK: type: ALTERTABLE_CREATETAG
+PREHOOK: Input: default@ice_orc
+POSTHOOK: query: alter table ice_orc create tag v3
+POSTHOOK: type: ALTERTABLE_CREATETAG
+POSTHOOK: Input: default@ice_orc
PREHOOK: query: alter table ice_orc set partition spec(company_id, dept_id)
PREHOOK: type: ALTERTABLE_SETPARTSPEC
PREHOOK: Input: default@ice_orc
@@ -219,6 +237,12 @@ POSTHOOK: query: delete from ice_orc where last_name in
('ln1a', 'ln8a')
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_orc
#### A masked pattern was here ####
+PREHOOK: query: alter table ice_orc create tag v4
+PREHOOK: type: ALTERTABLE_CREATETAG
+PREHOOK: Input: default@ice_orc
+POSTHOOK: query: alter table ice_orc create tag v4
+POSTHOOK: type: ALTERTABLE_CREATETAG
+POSTHOOK: Input: default@ice_orc
PREHOOK: query: select * from ice_orc
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_orc
@@ -334,6 +358,454 @@ POSTHOOK: Input: default@ice_orc
{"company_id":null,"dept_id":null,"team_id":null} 0 0 1
{"company_id":null,"dept_id":null,"team_id":null} 1 0 1
{"company_id":null,"dept_id":null,"team_id":null} 2 0 1
+PREHOOK: query: explain select * from default.ice_orc.tag_v1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ Snapshot ref: tag_v1
+ Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 4 Data size: 768 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 768 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from default.ice_orc.tag_v2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ Snapshot ref: tag_v2
+ Statistics: Num rows: 8 Data size: 1536 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 8 Data size: 1536 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 1536 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from default.ice_orc.tag_v3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ Snapshot ref: tag_v3
+ Statistics: Num rows: 11 Data size: 4312 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11 Data size: 4312 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 4312 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from default.ice_orc.tag_v4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ Snapshot ref: tag_v4
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from default.ice_orc.tag_v1 where company_id
is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v1 where company_id
is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ filterExpr: company_id is not null (type: boolean)
+ Snapshot ref: tag_v1
+ Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: company_id is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 576 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 3 Data size: 576 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 576 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from default.ice_orc.tag_v2 where company_id
is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v2 where company_id
is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ filterExpr: company_id is not null (type: boolean)
+ Snapshot ref: tag_v2
+ Statistics: Num rows: 8 Data size: 1536 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: company_id is not null (type: boolean)
+ Statistics: Num rows: 7 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 7 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 1344 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from default.ice_orc.tag_v3 where company_id
is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v3 where company_id
is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ filterExpr: company_id is not null (type: boolean)
+ Snapshot ref: tag_v3
+ Statistics: Num rows: 11 Data size: 4312 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: company_id is not null (type: boolean)
+ Statistics: Num rows: 11 Data size: 4312 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11 Data size: 4312 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 4312 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from default.ice_orc.tag_v4 where company_id
is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from default.ice_orc.tag_v4 where company_id
is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ filterExpr: company_id is not null (type: boolean)
+ Snapshot ref: tag_v4
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: company_id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from ice_orc where company_id is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from ice_orc where company_id is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ filterExpr: company_id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: company_id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 3528 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: explain alter table ice_orc COMPACT 'major' and wait
PREHOOK: type: ALTERTABLE_COMPACT
PREHOOK: Input: default@ice_orc
@@ -466,3 +938,88 @@ POSTHOOK: Input: default@ice_orc
{"company_id":100,"dept_id":2,"team_id":null} 1 0 3
{"company_id":null,"dept_id":null,"team_id":null} 1 0 1
{"company_id":null,"dept_id":null,"team_id":null} 1 0 2
+PREHOOK: query: explain select * from ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ Statistics: Num rows: 9 Data size: 1592 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 9 Data size: 1592 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 1592 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from ice_orc where company_id is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from ice_orc where company_id is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ice_orc
+ filterExpr: company_id is not null (type: boolean)
+ Statistics: Num rows: 6 Data size: 1194 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: first_name (type: string), last_name (type:
string), dept_id (type: bigint), team_id (type: bigint), company_id (type:
bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6 Data size: 1194 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 1194 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+