This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ddcba0bef3a HIVE-28537: Iceberg: Compaction: Allow only partition
columns in the WHERE clause (Zoltan Ratkai, reviewed by Denys Kuzmenko, Dmitriy
Fingerman)
ddcba0bef3a is described below
commit ddcba0bef3a11beeb606792b21d025653e34d378
Author: Zoltan Ratkai <[email protected]>
AuthorDate: Fri Oct 25 09:41:16 2024 +0200
HIVE-28537: Iceberg: Compaction: Allow only partition columns in the WHERE
clause (Zoltan Ratkai, reviewed by Denys Kuzmenko, Dmitriy Fingerman)
Closes #5483
---
.../java/org/apache/hadoop/hive/ql/ErrorMsg.java | 1 +
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 4 +-
.../apache/iceberg/mr/hive/IcebergTableUtil.java | 8 ++
...ompaction_with_non_partition_column_in_filter.q | 11 ++
...action_partition_evolution_w_id_spec_w_filter.q | 19 +++-
...ction_with_non_partition_column_in_filter.q.out | 33 ++++++
...on_partition_evolution_w_id_spec_w_filter.q.out | 112 +++++++++++++++++----
.../storage/compact/AlterTableCompactAnalyzer.java | 9 +-
.../hive/ql/metadata/HiveStorageHandler.java | 2 +-
.../hive/ql/optimizer/ppr/PartitionPruner.java | 27 +++--
.../apache/hadoop/hive/ql/parse/ParseUtils.java | 2 +-
11 files changed, 194 insertions(+), 34 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 1348ecb2c73..d3c85832b7a 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -492,6 +492,7 @@ public enum ErrorMsg {
NONICEBERG_COMPACTION_WITH_FILTER_NOT_SUPPORTED(10440, "Compaction with
filter is not allowed on non-Iceberg table {0}.{1}", true),
ICEBERG_COMPACTION_WITH_PART_SPEC_AND_FILTER_NOT_SUPPORTED(10441,
"Compaction command with both partition spec and filter is not supported on
Iceberg table {0}.{1}", true),
COMPACTION_THREAD_INITIALIZATION(10442, "Compaction thread failed during
initialization", false),
+ ALTER_TABLE_COMPACTION_NON_PARTITIONED_COLUMN_NOT_ALLOWED(10443, "Filter
expression can contain only partition columns."),
//========================== 20000 range starts here
========================//
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 50280a8a0ab..b70bd15179f 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -2098,9 +2098,9 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
}
@Override
- public List<FieldSchema>
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+ public List<FieldSchema>
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable, boolean
latestSpecOnly) {
Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
- return IcebergTableUtil.getPartitionKeys(icebergTable,
icebergTable.spec().specId());
+ return IcebergTableUtil.getPartitionKeys(icebergTable, latestSpecOnly);
}
@Override
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
index 88dd006b772..773ae553b2b 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
@@ -484,6 +484,14 @@ public class IcebergTableUtil {
String.format("Transform: %s",
partField.transform().toString()))).collect(Collectors.toList());
}
+ public static List<FieldSchema> getPartitionKeys(Table table, boolean
latestSpecOnly) {
+ if (latestSpecOnly) {
+ return getPartitionKeys(table, table.spec().specId());
+ } else {
+ return table.specs().keySet().stream().flatMap(id ->
getPartitionKeys(table, id).stream())
+ .distinct().collect(Collectors.toList());
+ }
+ }
public static List<PartitionField> getPartitionFields(Table table) {
return table.specs().values().stream().flatMap(spec -> spec.fields()
.stream()).distinct().collect(Collectors.toList());
diff --git
a/iceberg/iceberg-handler/src/test/queries/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q
b/iceberg/iceberg-handler/src/test/queries/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q
new file mode 100644
index 00000000000..52f9ea26158
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/queries/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q
@@ -0,0 +1,11 @@
+set hive.llap.io.enabled=true;
+set hive.vectorized.execution.enabled=true;
+set hive.optimize.shared.work.merge.ts.schema=true;
+
+create table iceberg_orc_compaction (a int, b int, c string) partitioned by (d
int) stored by iceberg stored as orc;
+
+insert into iceberg_orc_compaction values (1, 11, "text1",
111),(2,22,"text2",222);
+insert into iceberg_orc_compaction values (3, 33, "text3",
333),(4,44,"text4",444);
+insert into iceberg_orc_compaction values (5, 55, "text5",
555),(6,66,"text6",666);
+
+alter table iceberg_orc_compaction COMPACT 'major' and wait where c in
('text1', 'text2');
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
index 7d0576343ae..53e915d09ca 100644
---
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
+++
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
@@ -65,11 +65,26 @@ delete from ice_orc where last_name in ('ln1', 'ln9');
delete from ice_orc where last_name in ('ln3', 'ln11');
delete from ice_orc where last_name in ('ln5', 'ln13');
+alter table ice_orc set partition spec(team_id);
+insert into ice_orc VALUES
+ ('fn17', 'ln17', 1, 10, 100),
+ ('fn18','ln18', 1, 10, 100);
+insert into ice_orc VALUES
+ ('fn19','ln19', 2, 11, 100),
+ ('fn20','ln20', 2, 11, 100);
+insert into ice_orc VALUES
+ ('fn21','ln21', 3, 12, 100),
+ ('fn22','ln22', 3, 12, 100);
+insert into ice_orc VALUES
+ ('fn23','ln23', 4, 13, 100),
+ ('fn24','ln24', 4, 13, 100);
+
+
select * from ice_orc;
describe formatted ice_orc;
-explain alter table ice_orc COMPACT 'major' and wait where team_id=10 or
first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15');
-alter table ice_orc COMPACT 'major' and wait where team_id=10 or first_name in
('fn3', 'fn11') or last_name in ('ln7', 'ln15');
+explain alter table ice_orc COMPACT 'major' and wait where company_id=100 or
dept_id in (1,2);
+alter table ice_orc COMPACT 'major' and wait where company_id=100 or dept_id
in (1,2);
select * from ice_orc;
describe formatted ice_orc;
diff --git
a/iceberg/iceberg-handler/src/test/results/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q.out
b/iceberg/iceberg-handler/src/test/results/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q.out
new file mode 100644
index 00000000000..62c708a1478
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/results/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q.out
@@ -0,0 +1,33 @@
+PREHOOK: query: create table iceberg_orc_compaction (a int, b int, c string)
partitioned by (d int) stored by iceberg stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: create table iceberg_orc_compaction (a int, b int, c string)
partitioned by (d int) stored by iceberg stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@iceberg_orc_compaction
+PREHOOK: query: insert into iceberg_orc_compaction values (1, 11, "text1",
111),(2,22,"text2",222)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: insert into iceberg_orc_compaction values (1, 11, "text1",
111),(2,22,"text2",222)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@iceberg_orc_compaction
+PREHOOK: query: insert into iceberg_orc_compaction values (3, 33, "text3",
333),(4,44,"text4",444)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: insert into iceberg_orc_compaction values (3, 33, "text3",
333),(4,44,"text4",444)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@iceberg_orc_compaction
+PREHOOK: query: insert into iceberg_orc_compaction values (5, 55, "text5",
555),(6,66,"text6",666)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: insert into iceberg_orc_compaction values (5, 55, "text5",
555),(6,66,"text6",666)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@iceberg_orc_compaction
+FAILED: SemanticException [Error 10443]: Filter expression can contain only
partition columns.
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
index 95a7ef33c91..7df4035b818 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
@@ -149,6 +149,61 @@ POSTHOOK: query: delete from ice_orc where last_name in
('ln5', 'ln13')
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: default@ice_orc
+PREHOOK: query: alter table ice_orc set partition spec(team_id)
+PREHOOK: type: ALTERTABLE_SETPARTSPEC
+PREHOOK: Input: default@ice_orc
+POSTHOOK: query: alter table ice_orc set partition spec(team_id)
+POSTHOOK: type: ALTERTABLE_SETPARTSPEC
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+ ('fn17', 'ln17', 1, 10, 100),
+ ('fn18','ln18', 1, 10, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+ ('fn17', 'ln17', 1, 10, 100),
+ ('fn18','ln18', 1, 10, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+ ('fn19','ln19', 2, 11, 100),
+ ('fn20','ln20', 2, 11, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+ ('fn19','ln19', 2, 11, 100),
+ ('fn20','ln20', 2, 11, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+ ('fn21','ln21', 3, 12, 100),
+ ('fn22','ln22', 3, 12, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+ ('fn21','ln21', 3, 12, 100),
+ ('fn22','ln22', 3, 12, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+ ('fn23','ln23', 4, 13, 100),
+ ('fn24','ln24', 4, 13, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+ ('fn23','ln23', 4, 13, 100),
+ ('fn24','ln24', 4, 13, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
PREHOOK: query: select * from ice_orc
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_orc
@@ -162,7 +217,15 @@ fn12 ln12 2 11 100
fn14 ln14 3 12 100
fn15 ln15 4 13 100
fn16 ln16 4 13 100
+fn17 ln17 1 10 100
+fn18 ln18 1 10 100
+fn19 ln19 2 11 100
fn2 ln2 1 10 100
+fn20 ln20 2 11 100
+fn21 ln21 3 12 100
+fn22 ln22 3 12 100
+fn23 ln23 4 13 100
+fn24 ln24 4 13 100
fn4 ln4 2 11 100
fn6 ln6 3 12 100
fn7 ln7 4 13 100
@@ -182,8 +245,7 @@ company_id bigint
# Partition Transform Information
# col_name transform_type
-company_id IDENTITY
-dept_id IDENTITY
+team_id IDENTITY
# Detailed Table Information
Database: default
@@ -192,24 +254,24 @@ Retention: 0
#### A masked pattern was here ####
Table Type: EXTERNAL_TABLE
Table Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"company_id\":\"true\",\"dept_id\":\"true\",\"first_name\":\"true\",\"last_name\":\"true\",\"team_id\":\"true\"}}
EXTERNAL TRUE
bucketing_version 2
current-schema
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"team_id\",\"required\":false,\"type\":\"long\"},{\"id\":5,\"name\":\"company_id\",\"required\":false,\"type\":\"long\"}]}
current-snapshot-id #Masked#
- current-snapshot-summary
{\"added-position-delete-files\":\"2\",\"added-delete-files\":\"2\",\"added-files-size\":\"#Masked#\",\"added-position-deletes\":\"2\",\"changed-partition-count\":\"2\",\"total-records\":\"16\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"8\",\"total-delete-files\":\"6\",\"total-position-deletes\":\"6\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"}
+ current-snapshot-summary
{\"added-data-files\":\"1\",\"added-records\":\"2\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"24\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"12\",\"total-delete-files\":\"6\",\"total-position-deletes\":\"6\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"}
current-snapshot-timestamp-ms #Masked#
- default-partition-spec
{\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]}
+ default-partition-spec
{\"spec-id\":2,\"fields\":[{\"name\":\"team_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1002}]}
format-version 2
iceberg.orc.files.only true
#### A masked pattern was here ####
- numFiles 8
- numRows 10
+ numFiles 12
+ numRows 18
parquet.compression zstd
#### A masked pattern was here ####
rawDataSize 0
serialization.format 1
- snapshot-count 11
+ snapshot-count 15
storage_handler
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
table_type ICEBERG
totalSize #Masked#
@@ -226,11 +288,11 @@ InputFormat:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
Compressed: No
Sort Columns: []
-PREHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where
team_id=10 or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+PREHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where
company_id=100 or dept_id in (1,2)
PREHOOK: type: ALTERTABLE_COMPACT
PREHOOK: Input: default@ice_orc
PREHOOK: Output: default@ice_orc
-POSTHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where
team_id=10 or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+POSTHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where
company_id=100 or dept_id in (1,2)
POSTHOOK: type: ALTERTABLE_COMPACT
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: default@ice_orc
@@ -246,11 +308,11 @@ STAGE PLANS:
table name: default.ice_orc
blocking: true
-PREHOOK: query: alter table ice_orc COMPACT 'major' and wait where team_id=10
or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+PREHOOK: query: alter table ice_orc COMPACT 'major' and wait where
company_id=100 or dept_id in (1,2)
PREHOOK: type: ALTERTABLE_COMPACT
PREHOOK: Input: default@ice_orc
PREHOOK: Output: default@ice_orc
-POSTHOOK: query: alter table ice_orc COMPACT 'major' and wait where team_id=10
or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+POSTHOOK: query: alter table ice_orc COMPACT 'major' and wait where
company_id=100 or dept_id in (1,2)
POSTHOOK: type: ALTERTABLE_COMPACT
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: default@ice_orc
@@ -267,7 +329,15 @@ fn12 ln12 2 11 100
fn14 ln14 3 12 100
fn15 ln15 4 13 100
fn16 ln16 4 13 100
+fn17 ln17 1 10 100
+fn18 ln18 1 10 100
+fn19 ln19 2 11 100
fn2 ln2 1 10 100
+fn20 ln20 2 11 100
+fn21 ln21 3 12 100
+fn22 ln22 3 12 100
+fn23 ln23 4 13 100
+fn24 ln24 4 13 100
fn4 ln4 2 11 100
fn6 ln6 3 12 100
fn7 ln7 4 13 100
@@ -287,8 +357,7 @@ company_id bigint
# Partition Transform Information
# col_name transform_type
-company_id IDENTITY
-dept_id IDENTITY
+team_id IDENTITY
# Detailed Table Information
Database: default
@@ -302,19 +371,19 @@ Table Parameters:
bucketing_version 2
current-schema
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"team_id\",\"required\":false,\"type\":\"long\"},{\"id\":5,\"name\":\"company_id\",\"required\":false,\"type\":\"long\"}]}
current-snapshot-id #Masked#
- current-snapshot-summary
{\"added-data-files\":\"4\",\"deleted-data-files\":\"4\",\"removed-position-delete-files\":\"3\",\"removed-delete-files\":\"3\",\"added-records\":\"5\",\"deleted-records\":\"8\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"removed-position-deletes\":\"3\",\"changed-partition-count\":\"5\",\"total-records\":\"11\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"8\",\"total-delete-files\":\"1\",\"total-position-deletes\":\"1\",\
[...]
+ current-snapshot-summary
{\"added-data-files\":\"4\",\"deleted-data-files\":\"8\",\"removed-position-delete-files\":\"6\",\"removed-delete-files\":\"6\",\"added-records\":\"10\",\"deleted-records\":\"16\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"removed-position-deletes\":\"6\",\"changed-partition-count\":\"9\",\"total-records\":\"18\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"8\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\"
[...]
current-snapshot-timestamp-ms #Masked#
- default-partition-spec
{\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]}
+ default-partition-spec
{\"spec-id\":2,\"fields\":[{\"name\":\"team_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1002}]}
format-version 2
iceberg.orc.files.only true
#### A masked pattern was here ####
numFiles 8
- numRows 10
+ numRows 18
parquet.compression zstd
#### A masked pattern was here ####
rawDataSize 0
serialization.format 1
- snapshot-count 15
+ snapshot-count 20
storage_handler
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
table_type ICEBERG
totalSize #Masked#
@@ -336,7 +405,8 @@ PREHOOK: type: SHOW COMPACTIONS
POSTHOOK: query: show compactions order by 'partition'
POSTHOOK: type: SHOW COMPACTIONS
CompactionId Database Table Partition Type State Worker
host Worker Enqueue Time Start Time Duration(ms) HadoopJobId
Error message Initiator host Initiator Pool name TxnId Next
TxnId Commit Time Highest WriteId
-#Masked# default ice_orc company_id=100/dept_id=1 MAJOR
succeeded #Masked# manual default 0 0 0 ---
-#Masked# default ice_orc company_id=100/dept_id=2 MAJOR
succeeded #Masked# manual default 0 0 0 ---
-#Masked# default ice_orc company_id=100/dept_id=4 MAJOR
succeeded #Masked# manual default 0 0 0 ---
+#Masked# default ice_orc team_id=10 MAJOR succeeded
#Masked# manual default 0 0 0 ---
+#Masked# default ice_orc team_id=11 MAJOR succeeded
#Masked# manual default 0 0 0 ---
+#Masked# default ice_orc team_id=12 MAJOR succeeded
#Masked# manual default 0 0 0 ---
+#Masked# default ice_orc team_id=13 MAJOR succeeded
#Masked# manual default 0 0 0 ---
#Masked# default ice_orc --- MAJOR succeeded #Masked#
manual default 0 0 0 ---
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
index 69e0a77a2d5..6ce62ee9ecc 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
@@ -33,6 +33,8 @@ import
org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableAnalyzer;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.RowResolver;
@@ -93,8 +95,10 @@ public class AlterTableCompactAnalyzer extends
AbstractAlterTableAnalyzer {
case HiveParser.TOK_WHERE:
RowResolver rwsch = new RowResolver();
Map<String, String> colTypes = new HashMap<>();
+ Table table;
try {
- for (FieldSchema fs : getDb().getTable(tableName).getCols()) {
+ table = getDb().getTable(tableName);
+ for (FieldSchema fs : table.getCols()) {
TypeInfo columnType =
TypeInfoUtils.getTypeInfoFromTypeString(fs.getType());
rwsch.put(tableName.getTable(), fs.getName(),
new ColumnInfo(fs.getName(), columnType, null, true));
@@ -106,6 +110,9 @@ public class AlterTableCompactAnalyzer extends
AbstractAlterTableAnalyzer {
TypeCheckCtx tcCtx = new TypeCheckCtx(rwsch);
ASTNode conds = (ASTNode) node.getChild(0);
filterExpr = ExprNodeTypeCheck.genExprNode(conds, tcCtx).get(conds);
+ if (!PartitionPruner.onlyContainsPartnCols(table, filterExpr)) {
+ throw new
SemanticException(ErrorMsg.ALTER_TABLE_COMPACTION_NON_PARTITIONED_COLUMN_NOT_ALLOWED);
+ }
break;
default:
break;
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 2b05837a884..f186e264687 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -796,7 +796,7 @@ public interface HiveStorageHandler extends Configurable {
SearchArgument searchArgument) {
return false;
}
- default List<FieldSchema>
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+ default List<FieldSchema>
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable, boolean
latestSpecOnly) {
throw new UnsupportedOperationException("Storage handler does not support
getting partition keys " +
"for a table.");
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
index 5ccf16af75f..1d1e07c66aa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -27,6 +27,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -104,19 +105,19 @@ public class PartitionPruner extends Transform {
* if the table is not partitioned, the function always returns true.
* condition.
*
- * @param tab
+ * @param table
* the table object
* @param expr
* the pruner expression for the table
*/
- public static boolean onlyContainsPartnCols(Table tab, ExprNodeDesc expr) {
- if (!tab.isPartitioned() || (expr == null)) {
+ public static boolean onlyContainsPartnCols(Table table, ExprNodeDesc expr) {
+ if(!isPartitioned(table) || (expr == null)) {
return true;
}
if (expr instanceof ExprNodeColumnDesc) {
- String colName = ((ExprNodeColumnDesc) expr).getColumn();
- return tab.isPartitionKey(colName);
+ String columnName = ((ExprNodeColumnDesc) expr).getColumn();
+ return isPartitionKey(table, columnName);
}
// It cannot contain a non-deterministic function
@@ -130,7 +131,7 @@ public class PartitionPruner extends Transform {
List<ExprNodeDesc> children = expr.getChildren();
if (children != null) {
for (int i = 0; i < children.size(); i++) {
- if (!onlyContainsPartnCols(tab, children.get(i))) {
+ if (!onlyContainsPartnCols(table, children.get(i))) {
return false;
}
}
@@ -139,6 +140,20 @@ public class PartitionPruner extends Transform {
return true;
}
+ private static boolean isPartitioned(Table table) {
+ if (table.getStorageHandler() != null &&
table.getStorageHandler().alwaysUnpartitioned()) {
+ return table.getStorageHandler().isPartitioned(table);
+ } else {
+ return table.isPartitioned();
+ }
+ }
+
+ private static boolean isPartitionKey(Table table, String columnName) {
+ List<String> partitionKeyNames = table.getStorageHandler() != null &&
table.getStorageHandler().alwaysUnpartitioned() ?
+ table.getStorageHandler().getPartitionKeys(table, false).stream()
+ .map(FieldSchema::getName).collect(Collectors.toList()) :
table.getPartColNames();
+ return
partitionKeyNames.stream().anyMatch(item->item.equalsIgnoreCase(columnName));
+ }
/**
* Get the partition list for the TS operator that satisfies the partition
pruner
* condition.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index 9cbd47123a0..df28d7992df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -580,7 +580,7 @@ public final class ParseUtils {
String defaultPartitionName = HiveConf.getVar(conf,
HiveConf.ConfVars.DEFAULT_PARTITION_NAME);
Map<String, String> colTypes = new HashMap<>();
List<FieldSchema> partitionKeys = table.getStorageHandler() != null &&
table.getStorageHandler().alwaysUnpartitioned() ?
- table.getStorageHandler().getPartitionKeys(table) :
table.getPartitionKeys();
+ table.getStorageHandler().getPartitionKeys(table, true) :
table.getPartitionKeys();
for (FieldSchema fs : partitionKeys) {
colTypes.put(fs.getName().toLowerCase(), fs.getType());
}