(hive) branch master updated: HIVE-28537: Iceberg: Compaction: Allow only partition columns in the WHERE clause (Zoltan Ratkai, reviewed by Denys Kuzmenko, Dmitriy Fingerman)

dkuzmenko Fri, 25 Oct 2024 00:41:32 -0700

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new ddcba0bef3a HIVE-28537: Iceberg: Compaction: Allow only partition 
columns in the WHERE clause (Zoltan Ratkai, reviewed by Denys Kuzmenko, Dmitriy 
Fingerman)
ddcba0bef3a is described below

commit ddcba0bef3a11beeb606792b21d025653e34d378
Author: Zoltan Ratkai <[email protected]>
AuthorDate: Fri Oct 25 09:41:16 2024 +0200

    HIVE-28537: Iceberg: Compaction: Allow only partition columns in the WHERE 
clause (Zoltan Ratkai, reviewed by Denys Kuzmenko, Dmitriy Fingerman)
    
    Closes #5483
---
 .../java/org/apache/hadoop/hive/ql/ErrorMsg.java   |   1 +
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |   4 +-
 .../apache/iceberg/mr/hive/IcebergTableUtil.java   |   8 ++
 ...ompaction_with_non_partition_column_in_filter.q |  11 ++
 ...action_partition_evolution_w_id_spec_w_filter.q |  19 +++-
 ...ction_with_non_partition_column_in_filter.q.out |  33 ++++++
 ...on_partition_evolution_w_id_spec_w_filter.q.out | 112 +++++++++++++++++----
 .../storage/compact/AlterTableCompactAnalyzer.java |   9 +-
 .../hive/ql/metadata/HiveStorageHandler.java       |   2 +-
 .../hive/ql/optimizer/ppr/PartitionPruner.java     |  27 +++--
 .../apache/hadoop/hive/ql/parse/ParseUtils.java    |   2 +-
 11 files changed, 194 insertions(+), 34 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 1348ecb2c73..d3c85832b7a 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -492,6 +492,7 @@ public enum ErrorMsg {
   NONICEBERG_COMPACTION_WITH_FILTER_NOT_SUPPORTED(10440, "Compaction with 
filter is not allowed on non-Iceberg table {0}.{1}", true),
   ICEBERG_COMPACTION_WITH_PART_SPEC_AND_FILTER_NOT_SUPPORTED(10441, 
"Compaction command with both partition spec and filter is not supported on 
Iceberg table {0}.{1}", true),
   COMPACTION_THREAD_INITIALIZATION(10442, "Compaction thread failed during 
initialization", false),
+  ALTER_TABLE_COMPACTION_NON_PARTITIONED_COLUMN_NOT_ALLOWED(10443, "Filter 
expression can contain only partition columns."),
 
   //========================== 20000 range starts here 
========================//
 
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 50280a8a0ab..b70bd15179f 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -2098,9 +2098,9 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   }
 
   @Override
-  public List<FieldSchema> 
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+  public List<FieldSchema> 
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable, boolean 
latestSpecOnly) {
     Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
-    return IcebergTableUtil.getPartitionKeys(icebergTable, 
icebergTable.spec().specId());
+    return IcebergTableUtil.getPartitionKeys(icebergTable, latestSpecOnly);
   }
 
   @Override
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
index 88dd006b772..773ae553b2b 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
@@ -484,6 +484,14 @@ public class IcebergTableUtil {
             String.format("Transform: %s", 
partField.transform().toString()))).collect(Collectors.toList());
   }
 
+  public static List<FieldSchema> getPartitionKeys(Table table, boolean 
latestSpecOnly) {
+    if (latestSpecOnly) {
+      return getPartitionKeys(table, table.spec().specId());
+    } else {
+      return table.specs().keySet().stream().flatMap(id -> 
getPartitionKeys(table, id).stream())
+          .distinct().collect(Collectors.toList());
+    }
+  }
   public static List<PartitionField> getPartitionFields(Table table) {
     return table.specs().values().stream().flatMap(spec -> spec.fields()
         .stream()).distinct().collect(Collectors.toList());
diff --git 
a/iceberg/iceberg-handler/src/test/queries/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q
 
b/iceberg/iceberg-handler/src/test/queries/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q
new file mode 100644
index 00000000000..52f9ea26158
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/queries/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q
@@ -0,0 +1,11 @@
+set hive.llap.io.enabled=true;
+set hive.vectorized.execution.enabled=true;
+set hive.optimize.shared.work.merge.ts.schema=true;
+
+create table iceberg_orc_compaction (a int, b int, c string) partitioned by (d 
int) stored by iceberg stored as orc;
+
+insert into iceberg_orc_compaction values  (1, 11, "text1", 
111),(2,22,"text2",222);
+insert into iceberg_orc_compaction values  (3, 33, "text3", 
333),(4,44,"text4",444);
+insert into iceberg_orc_compaction values  (5, 55, "text5", 
555),(6,66,"text6",666);
+
+alter table iceberg_orc_compaction COMPACT 'major' and wait where c in 
('text1', 'text2');
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
index 7d0576343ae..53e915d09ca 100644
--- 
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q
@@ -65,11 +65,26 @@ delete from ice_orc where last_name in ('ln1', 'ln9');
 delete from ice_orc where last_name in ('ln3', 'ln11');
 delete from ice_orc where last_name in ('ln5', 'ln13');
 
+alter table ice_orc set partition spec(team_id);
+insert into ice_orc VALUES
+    ('fn17', 'ln17',  1, 10, 100),
+    ('fn18','ln18', 1, 10, 100);
+insert into ice_orc VALUES
+    ('fn19','ln19', 2, 11, 100),
+    ('fn20','ln20', 2, 11, 100);
+insert into ice_orc VALUES
+    ('fn21','ln21', 3, 12, 100),
+    ('fn22','ln22', 3, 12, 100);
+insert into ice_orc VALUES
+    ('fn23','ln23', 4, 13, 100),
+    ('fn24','ln24', 4, 13, 100);
+
+
 select * from ice_orc;
 describe formatted ice_orc;
 
-explain alter table ice_orc COMPACT 'major' and wait where team_id=10 or 
first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15');
-alter table ice_orc COMPACT 'major' and wait where team_id=10 or first_name in 
('fn3', 'fn11') or last_name in ('ln7', 'ln15');
+explain alter table ice_orc COMPACT 'major' and wait where company_id=100 or 
dept_id in (1,2);
+alter table ice_orc COMPACT 'major' and wait where company_id=100 or dept_id 
in (1,2);
 
 select * from ice_orc;
 describe formatted ice_orc;
diff --git 
a/iceberg/iceberg-handler/src/test/results/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q.out
 
b/iceberg/iceberg-handler/src/test/results/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q.out
new file mode 100644
index 00000000000..62c708a1478
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/results/negative/iceberg_major_compaction_with_non_partition_column_in_filter.q.out
@@ -0,0 +1,33 @@
+PREHOOK: query: create table iceberg_orc_compaction (a int, b int, c string) 
partitioned by (d int) stored by iceberg stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: create table iceberg_orc_compaction (a int, b int, c string) 
partitioned by (d int) stored by iceberg stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@iceberg_orc_compaction
+PREHOOK: query: insert into iceberg_orc_compaction values  (1, 11, "text1", 
111),(2,22,"text2",222)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: insert into iceberg_orc_compaction values  (1, 11, "text1", 
111),(2,22,"text2",222)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@iceberg_orc_compaction
+PREHOOK: query: insert into iceberg_orc_compaction values  (3, 33, "text3", 
333),(4,44,"text4",444)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: insert into iceberg_orc_compaction values  (3, 33, "text3", 
333),(4,44,"text4",444)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@iceberg_orc_compaction
+PREHOOK: query: insert into iceberg_orc_compaction values  (5, 55, "text5", 
555),(6,66,"text6",666)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@iceberg_orc_compaction
+POSTHOOK: query: insert into iceberg_orc_compaction values  (5, 55, "text5", 
555),(6,66,"text6",666)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@iceberg_orc_compaction
+FAILED: SemanticException [Error 10443]: Filter expression can contain only 
partition columns.
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
index 95a7ef33c91..7df4035b818 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out
@@ -149,6 +149,61 @@ POSTHOOK: query: delete from ice_orc where last_name in 
('ln5', 'ln13')
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@ice_orc
 POSTHOOK: Output: default@ice_orc
+PREHOOK: query: alter table ice_orc set partition spec(team_id)
+PREHOOK: type: ALTERTABLE_SETPARTSPEC
+PREHOOK: Input: default@ice_orc
+POSTHOOK: query: alter table ice_orc set partition spec(team_id)
+POSTHOOK: type: ALTERTABLE_SETPARTSPEC
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+    ('fn17', 'ln17',  1, 10, 100),
+    ('fn18','ln18', 1, 10, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+    ('fn17', 'ln17',  1, 10, 100),
+    ('fn18','ln18', 1, 10, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+    ('fn19','ln19', 2, 11, 100),
+    ('fn20','ln20', 2, 11, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+    ('fn19','ln19', 2, 11, 100),
+    ('fn20','ln20', 2, 11, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+    ('fn21','ln21', 3, 12, 100),
+    ('fn22','ln22', 3, 12, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+    ('fn21','ln21', 3, 12, 100),
+    ('fn22','ln22', 3, 12, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: insert into ice_orc VALUES
+    ('fn23','ln23', 4, 13, 100),
+    ('fn24','ln24', 4, 13, 100)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: insert into ice_orc VALUES
+    ('fn23','ln23', 4, 13, 100),
+    ('fn24','ln24', 4, 13, 100)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc
 PREHOOK: query: select * from ice_orc
 PREHOOK: type: QUERY
 PREHOOK: Input: default@ice_orc
@@ -162,7 +217,15 @@ fn12       ln12    2       11      100
 fn14   ln14    3       12      100
 fn15   ln15    4       13      100
 fn16   ln16    4       13      100
+fn17   ln17    1       10      100
+fn18   ln18    1       10      100
+fn19   ln19    2       11      100
 fn2    ln2     1       10      100
+fn20   ln20    2       11      100
+fn21   ln21    3       12      100
+fn22   ln22    3       12      100
+fn23   ln23    4       13      100
+fn24   ln24    4       13      100
 fn4    ln4     2       11      100
 fn6    ln6     3       12      100
 fn7    ln7     4       13      100
@@ -182,8 +245,7 @@ company_id                  bigint
                 
 # Partition Transform Information               
 # col_name             transform_type           
-company_id             IDENTITY                 
-dept_id                IDENTITY                 
+team_id                IDENTITY                 
                 
 # Detailed Table Information            
 Database:              default                  
@@ -192,24 +254,24 @@ Retention:                0
 #### A masked pattern was here ####
 Table Type:            EXTERNAL_TABLE           
 Table Parameters:               
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"company_id\":\"true\",\"dept_id\":\"true\",\"first_name\":\"true\",\"last_name\":\"true\",\"team_id\":\"true\"}}
        EXTERNAL                TRUE                
        bucketing_version       2                   
        current-schema          
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"team_id\",\"required\":false,\"type\":\"long\"},{\"id\":5,\"name\":\"company_id\",\"required\":false,\"type\":\"long\"}]}
        current-snapshot-id     #Masked#
-       current-snapshot-summary        
{\"added-position-delete-files\":\"2\",\"added-delete-files\":\"2\",\"added-files-size\":\"#Masked#\",\"added-position-deletes\":\"2\",\"changed-partition-count\":\"2\",\"total-records\":\"16\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"8\",\"total-delete-files\":\"6\",\"total-position-deletes\":\"6\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"}
+       current-snapshot-summary        
{\"added-data-files\":\"1\",\"added-records\":\"2\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"24\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"12\",\"total-delete-files\":\"6\",\"total-position-deletes\":\"6\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"}
        current-snapshot-timestamp-ms   #Masked#       
-       default-partition-spec  
{\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]}
+       default-partition-spec  
{\"spec-id\":2,\"fields\":[{\"name\":\"team_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1002}]}
        format-version          2                   
        iceberg.orc.files.only  true                
 #### A masked pattern was here ####
-       numFiles                8                   
-       numRows                 10                  
+       numFiles                12                  
+       numRows                 18                  
        parquet.compression     zstd                
 #### A masked pattern was here ####
        rawDataSize             0                   
        serialization.format    1                   
-       snapshot-count          11                  
+       snapshot-count          15                  
        storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
        table_type              ICEBERG             
        totalSize               #Masked#
@@ -226,11 +288,11 @@ InputFormat:              
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
 OutputFormat:          org.apache.iceberg.mr.hive.HiveIcebergOutputFormat      
 
 Compressed:            No                       
 Sort Columns:          []                       
-PREHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where 
team_id=10 or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+PREHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where 
company_id=100 or dept_id in (1,2)
 PREHOOK: type: ALTERTABLE_COMPACT
 PREHOOK: Input: default@ice_orc
 PREHOOK: Output: default@ice_orc
-POSTHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where 
team_id=10 or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+POSTHOOK: query: explain alter table ice_orc COMPACT 'major' and wait where 
company_id=100 or dept_id in (1,2)
 POSTHOOK: type: ALTERTABLE_COMPACT
 POSTHOOK: Input: default@ice_orc
 POSTHOOK: Output: default@ice_orc
@@ -246,11 +308,11 @@ STAGE PLANS:
       table name: default.ice_orc
       blocking: true
 
-PREHOOK: query: alter table ice_orc COMPACT 'major' and wait where team_id=10 
or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+PREHOOK: query: alter table ice_orc COMPACT 'major' and wait where 
company_id=100 or dept_id in (1,2)
 PREHOOK: type: ALTERTABLE_COMPACT
 PREHOOK: Input: default@ice_orc
 PREHOOK: Output: default@ice_orc
-POSTHOOK: query: alter table ice_orc COMPACT 'major' and wait where team_id=10 
or first_name in ('fn3', 'fn11') or last_name in ('ln7', 'ln15')
+POSTHOOK: query: alter table ice_orc COMPACT 'major' and wait where 
company_id=100 or dept_id in (1,2)
 POSTHOOK: type: ALTERTABLE_COMPACT
 POSTHOOK: Input: default@ice_orc
 POSTHOOK: Output: default@ice_orc
@@ -267,7 +329,15 @@ fn12       ln12    2       11      100
 fn14   ln14    3       12      100
 fn15   ln15    4       13      100
 fn16   ln16    4       13      100
+fn17   ln17    1       10      100
+fn18   ln18    1       10      100
+fn19   ln19    2       11      100
 fn2    ln2     1       10      100
+fn20   ln20    2       11      100
+fn21   ln21    3       12      100
+fn22   ln22    3       12      100
+fn23   ln23    4       13      100
+fn24   ln24    4       13      100
 fn4    ln4     2       11      100
 fn6    ln6     3       12      100
 fn7    ln7     4       13      100
@@ -287,8 +357,7 @@ company_id                  bigint
                 
 # Partition Transform Information               
 # col_name             transform_type           
-company_id             IDENTITY                 
-dept_id                IDENTITY                 
+team_id                IDENTITY                 
                 
 # Detailed Table Information            
 Database:              default                  
@@ -302,19 +371,19 @@ Table Parameters:
        bucketing_version       2                   
        current-schema          
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"team_id\",\"required\":false,\"type\":\"long\"},{\"id\":5,\"name\":\"company_id\",\"required\":false,\"type\":\"long\"}]}
        current-snapshot-id     #Masked#
-       current-snapshot-summary        
{\"added-data-files\":\"4\",\"deleted-data-files\":\"4\",\"removed-position-delete-files\":\"3\",\"removed-delete-files\":\"3\",\"added-records\":\"5\",\"deleted-records\":\"8\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"removed-position-deletes\":\"3\",\"changed-partition-count\":\"5\",\"total-records\":\"11\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"8\",\"total-delete-files\":\"1\",\"total-position-deletes\":\"1\",\
 [...]
+       current-snapshot-summary        
{\"added-data-files\":\"4\",\"deleted-data-files\":\"8\",\"removed-position-delete-files\":\"6\",\"removed-delete-files\":\"6\",\"added-records\":\"10\",\"deleted-records\":\"16\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"removed-position-deletes\":\"6\",\"changed-partition-count\":\"9\",\"total-records\":\"18\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"8\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\"
 [...]
        current-snapshot-timestamp-ms   #Masked#       
-       default-partition-spec  
{\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]}
+       default-partition-spec  
{\"spec-id\":2,\"fields\":[{\"name\":\"team_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1002}]}
        format-version          2                   
        iceberg.orc.files.only  true                
 #### A masked pattern was here ####
        numFiles                8                   
-       numRows                 10                  
+       numRows                 18                  
        parquet.compression     zstd                
 #### A masked pattern was here ####
        rawDataSize             0                   
        serialization.format    1                   
-       snapshot-count          15                  
+       snapshot-count          20                  
        storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
        table_type              ICEBERG             
        totalSize               #Masked#
@@ -336,7 +405,8 @@ PREHOOK: type: SHOW COMPACTIONS
 POSTHOOK: query: show compactions order by 'partition'
 POSTHOOK: type: SHOW COMPACTIONS
 CompactionId   Database        Table   Partition       Type    State   Worker 
host     Worker  Enqueue Time    Start Time      Duration(ms)    HadoopJobId    
 Error message   Initiator host  Initiator       Pool name       TxnId   Next 
TxnId      Commit Time     Highest WriteId
-#Masked#       default ice_orc company_id=100/dept_id=1        MAJOR   
succeeded       #Masked#        manual  default 0       0       0        --- 
-#Masked#       default ice_orc company_id=100/dept_id=2        MAJOR   
succeeded       #Masked#        manual  default 0       0       0        --- 
-#Masked#       default ice_orc company_id=100/dept_id=4        MAJOR   
succeeded       #Masked#        manual  default 0       0       0        --- 
+#Masked#       default ice_orc team_id=10      MAJOR   succeeded       
#Masked#        manual  default 0       0       0        --- 
+#Masked#       default ice_orc team_id=11      MAJOR   succeeded       
#Masked#        manual  default 0       0       0        --- 
+#Masked#       default ice_orc team_id=12      MAJOR   succeeded       
#Masked#        manual  default 0       0       0        --- 
+#Masked#       default ice_orc team_id=13      MAJOR   succeeded       
#Masked#        manual  default 0       0       0        --- 
 #Masked#       default ice_orc  ---    MAJOR   succeeded       #Masked#        
manual  default 0       0       0        --- 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
index 69e0a77a2d5..6ce62ee9ecc 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java
@@ -33,6 +33,8 @@ import 
org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableAnalyzer;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
@@ -93,8 +95,10 @@ public class AlterTableCompactAnalyzer extends 
AbstractAlterTableAnalyzer {
         case HiveParser.TOK_WHERE:
           RowResolver rwsch = new RowResolver();
           Map<String, String> colTypes = new HashMap<>();
+          Table table;
           try {
-            for (FieldSchema fs : getDb().getTable(tableName).getCols()) {
+            table = getDb().getTable(tableName);
+            for (FieldSchema fs : table.getCols()) {
               TypeInfo columnType = 
TypeInfoUtils.getTypeInfoFromTypeString(fs.getType());
               rwsch.put(tableName.getTable(), fs.getName(), 
                   new ColumnInfo(fs.getName(), columnType, null, true));
@@ -106,6 +110,9 @@ public class AlterTableCompactAnalyzer extends 
AbstractAlterTableAnalyzer {
           TypeCheckCtx tcCtx = new TypeCheckCtx(rwsch);
           ASTNode conds = (ASTNode) node.getChild(0);
           filterExpr = ExprNodeTypeCheck.genExprNode(conds, tcCtx).get(conds);
+          if (!PartitionPruner.onlyContainsPartnCols(table, filterExpr)) {
+            throw new 
SemanticException(ErrorMsg.ALTER_TABLE_COMPACTION_NON_PARTITIONED_COLUMN_NOT_ALLOWED);
+          }
           break;
         default:
           break;
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 2b05837a884..f186e264687 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -796,7 +796,7 @@ public interface HiveStorageHandler extends Configurable {
     SearchArgument searchArgument) {
     return false;
   }
-  default List<FieldSchema> 
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+  default List<FieldSchema> 
getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable, boolean 
latestSpecOnly) {
     throw new UnsupportedOperationException("Storage handler does not support 
getting partition keys " +
             "for a table.");
   }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
index 5ccf16af75f..1d1e07c66aa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -27,6 +27,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -104,19 +105,19 @@ public class PartitionPruner extends Transform {
    * if the table is not partitioned, the function always returns true.
    * condition.
    *
-   * @param tab
+   * @param table
    *          the table object
    * @param expr
    *          the pruner expression for the table
    */
-  public static boolean onlyContainsPartnCols(Table tab, ExprNodeDesc expr) {
-    if (!tab.isPartitioned() || (expr == null)) {
+  public static boolean onlyContainsPartnCols(Table table, ExprNodeDesc expr) {
+    if(!isPartitioned(table) || (expr == null)) {
       return true;
     }
 
     if (expr instanceof ExprNodeColumnDesc) {
-      String colName = ((ExprNodeColumnDesc) expr).getColumn();
-      return tab.isPartitionKey(colName);
+      String columnName = ((ExprNodeColumnDesc) expr).getColumn();
+      return isPartitionKey(table, columnName);
     }
 
     // It cannot contain a non-deterministic function
@@ -130,7 +131,7 @@ public class PartitionPruner extends Transform {
     List<ExprNodeDesc> children = expr.getChildren();
     if (children != null) {
       for (int i = 0; i < children.size(); i++) {
-        if (!onlyContainsPartnCols(tab, children.get(i))) {
+        if (!onlyContainsPartnCols(table, children.get(i))) {
           return false;
         }
       }
@@ -139,6 +140,20 @@ public class PartitionPruner extends Transform {
     return true;
   }
 
+  private static boolean isPartitioned(Table table) {
+    if (table.getStorageHandler() != null && 
table.getStorageHandler().alwaysUnpartitioned()) {
+      return table.getStorageHandler().isPartitioned(table);
+    } else {
+      return table.isPartitioned();
+    }
+  }
+  
+  private static boolean isPartitionKey(Table table, String columnName) {
+    List<String> partitionKeyNames = table.getStorageHandler() != null && 
table.getStorageHandler().alwaysUnpartitioned() ?
+        table.getStorageHandler().getPartitionKeys(table, false).stream()
+        .map(FieldSchema::getName).collect(Collectors.toList()) : 
table.getPartColNames();
+    return 
partitionKeyNames.stream().anyMatch(item->item.equalsIgnoreCase(columnName));
+  }
   /**
    * Get the partition list for the TS operator that satisfies the partition 
pruner
    * condition.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index 9cbd47123a0..df28d7992df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -580,7 +580,7 @@ public final class ParseUtils {
     String defaultPartitionName = HiveConf.getVar(conf, 
HiveConf.ConfVars.DEFAULT_PARTITION_NAME);
     Map<String, String> colTypes = new HashMap<>();
     List<FieldSchema> partitionKeys = table.getStorageHandler() != null && 
table.getStorageHandler().alwaysUnpartitioned() ?
-            table.getStorageHandler().getPartitionKeys(table) : 
table.getPartitionKeys();
+            table.getStorageHandler().getPartitionKeys(table, true) : 
table.getPartitionKeys();
     for (FieldSchema fs : partitionKeys) {
       colTypes.put(fs.getName().toLowerCase(), fs.getType());
     }

(hive) branch master updated: HIVE-28537: Iceberg: Compaction: Allow only partition columns in the WHERE clause (Zoltan Ratkai, reviewed by Denys Kuzmenko, Dmitriy Fingerman)

Reply via email to