This is an automated email from the ASF dual-hosted git repository.

sbadhya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new ffefb7daba4 HIVE-28903: Skip deleting archived path when drop 
partition/table (#5769)
ffefb7daba4 is described below

commit ffefb7daba454ee6559b1b92c6bc1fc6bc522094
Author: Wechar Yu <[email protected]>
AuthorDate: Tue May 6 21:09:04 2025 +0800

    HIVE-28903: Skip deleting archived path when drop partition/table (#5769)
---
 ql/src/test/queries/clientpositive/archive_drop.q  |  28 +++++
 .../results/clientpositive/llap/archive_drop.q.out | 122 +++++++++++++++++++++
 .../apache/hadoop/hive/metastore/Warehouse.java    |   5 +
 .../apache/hadoop/hive/metastore/HMSHandler.java   |  40 +++----
 4 files changed, 176 insertions(+), 19 deletions(-)

diff --git a/ql/src/test/queries/clientpositive/archive_drop.q 
b/ql/src/test/queries/clientpositive/archive_drop.q
new file mode 100644
index 00000000000..f60106f63ba
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/archive_drop.q
@@ -0,0 +1,28 @@
+set hive.mapred.mode=nonstrict;
+set hive.archive.enabled = true;
+
+create database test_db;
+
+create table test_db.test_tbl (id int, name string) partitioned by (dt date, 
hr string);
+
+insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='11') 
select 1, 'tom';
+insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='12') 
select 2, 'jerry';
+insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='13') 
select 3, 'spike';
+
+show partitions test_db.test_tbl;
+
+alter table test_db.test_tbl archive partition (dt='2025-04-01');
+dfs -ls 
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/;
+dfs -ls 
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/data.har/;
+
+show partitions test_db.test_tbl;
+
+alter table test_db.test_tbl drop partition (dt='2025-04-01',hr='12');
+dfs -ls 
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/;
+dfs -ls 
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/data.har/;
+
+show partitions test_db.test_tbl;
+
+select * from test_db.test_tbl;
+
+drop table test_db.test_tbl;
diff --git a/ql/src/test/results/clientpositive/llap/archive_drop.q.out 
b/ql/src/test/results/clientpositive/llap/archive_drop.q.out
new file mode 100644
index 00000000000..26e472009f0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/archive_drop.q.out
@@ -0,0 +1,122 @@
+PREHOOK: query: create database test_db
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:test_db
+POSTHOOK: query: create database test_db
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:test_db
+PREHOOK: query: create table test_db.test_tbl (id int, name string) 
partitioned by (dt date, hr string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:test_db
+PREHOOK: Output: test_db@test_tbl
+POSTHOOK: query: create table test_db.test_tbl (id int, name string) 
partitioned by (dt date, hr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:test_db
+POSTHOOK: Output: test_db@test_tbl
+PREHOOK: query: insert overwrite table test_db.test_tbl partition 
(dt='2025-04-01', hr='11') select 1, 'tom'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: query: insert overwrite table test_db.test_tbl partition 
(dt='2025-04-01', hr='11') select 1, 'tom'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=11).id SIMPLE []
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=11).name SIMPLE []
+PREHOOK: query: insert overwrite table test_db.test_tbl partition 
(dt='2025-04-01', hr='12') select 2, 'jerry'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: query: insert overwrite table test_db.test_tbl partition 
(dt='2025-04-01', hr='12') select 2, 'jerry'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=12).id SIMPLE []
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=12).name SIMPLE []
+PREHOOK: query: insert overwrite table test_db.test_tbl partition 
(dt='2025-04-01', hr='13') select 3, 'spike'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+POSTHOOK: query: insert overwrite table test_db.test_tbl partition 
(dt='2025-04-01', hr='13') select 3, 'spike'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=13).id SIMPLE []
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=13).name SIMPLE []
+PREHOOK: query: show partitions test_db.test_tbl
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: test_db@test_tbl
+POSTHOOK: query: show partitions test_db.test_tbl
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: test_db@test_tbl
+dt=2025-04-01/hr=11
+dt=2025-04-01/hr=12
+dt=2025-04-01/hr=13
+PREHOOK: query: alter table test_db.test_tbl archive partition 
(dt='2025-04-01')
+PREHOOK: type: ALTERTABLE_ARCHIVE
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+POSTHOOK: query: alter table test_db.test_tbl archive partition 
(dt='2025-04-01')
+POSTHOOK: type: ALTERTABLE_ARCHIVE
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+Found 1 items
+#### A masked pattern was here ####
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: show partitions test_db.test_tbl
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: test_db@test_tbl
+POSTHOOK: query: show partitions test_db.test_tbl
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: test_db@test_tbl
+dt=2025-04-01/hr=11
+dt=2025-04-01/hr=12
+dt=2025-04-01/hr=13
+PREHOOK: query: alter table test_db.test_tbl drop partition 
(dt='2025-04-01',hr='12')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: query: alter table test_db.test_tbl drop partition 
(dt='2025-04-01',hr='12')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+Found 1 items
+#### A masked pattern was here ####
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: show partitions test_db.test_tbl
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: test_db@test_tbl
+POSTHOOK: query: show partitions test_db.test_tbl
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: test_db@test_tbl
+dt=2025-04-01/hr=11
+dt=2025-04-01/hr=13
+PREHOOK: query: select * from test_db.test_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=11
+PREHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=13
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_db.test_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=13
+#### A masked pattern was here ####
+1      tom     2025-04-01      11
+3      spike   2025-04-01      13
+PREHOOK: query: drop table test_db.test_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Output: database:test_db
+PREHOOK: Output: test_db@test_tbl
+POSTHOOK: query: drop table test_db.test_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Output: database:test_db
+POSTHOOK: Output: test_db@test_tbl
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
index 9fd9370a2a4..bbb15191cc5 100755
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
@@ -47,6 +47,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.HarFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hive.metastore.ReplChangeManager.RecycleType;
@@ -467,6 +468,10 @@ public boolean deleteDir(Path f, boolean recursive, 
boolean ifPurge, boolean nee
       }
     }
     FileSystem fs = getFs(f);
+    if (fs instanceof HarFileSystem) {
+      LOG.warn("Har path {} is not supported to delete, skipping it.", f);
+      return true;
+    }
     return fsHandler.deleteDir(fs, f, recursive, ifPurge, conf);
   }
 
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
index b106c804fa1..55a1193f92c 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
@@ -3179,8 +3179,7 @@ private void deleteDataExcludeCmroot(Path path, boolean 
ifPurge, boolean shouldE
         wh.deleteDir(path, true, ifPurge, shouldEnableCm);
       }
     } catch (Exception e) {
-      LOG.error("Failed to delete directory: " + path +
-          " " + e.getMessage());
+      LOG.error("Failed to delete directory: {}", path, e);
     }
   }
 
@@ -5134,14 +5133,15 @@ private boolean drop_partition_common(RawStore ms, 
String catName, String db_nam
         throw new NoSuchObjectException("Partition doesn't exist. " + 
part_vals);
       }
       isArchived = MetaStoreUtils.isArchived(part);
-      if (tableDataShouldBeDeleted && isArchived) {
-        archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
-        verifyIsWritablePath(archiveParentDir);
-      }
-
-      if (tableDataShouldBeDeleted && (part.getSd() != null) && 
(part.getSd().getLocation() != null)) {
-        partPath = new Path(part.getSd().getLocation());
-        verifyIsWritablePath(partPath);
+      if (tableDataShouldBeDeleted) {
+        if (isArchived) {
+          // Archived partition is only able to delete original location.
+          archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
+          verifyIsWritablePath(archiveParentDir);
+        } else if ((part.getSd() != null) && (part.getSd().getLocation() != 
null)) {
+          partPath = new Path(part.getSd().getLocation());
+          verifyIsWritablePath(partPath);
+        }
       }
 
       String partName = Warehouse.makePartName(tbl.getPartitionKeys(), 
part_vals);
@@ -5381,15 +5381,17 @@ public DropPartitionsResult drop_partitions_req(
         if (colNames != null) {
           partNames.add(FileUtils.makePartName(colNames, part.getValues()));
         }
-        if (tableDataShouldBeDeleted && MetaStoreUtils.isArchived(part)) {
-          Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
-          verifyIsWritablePath(archiveParentDir);
-          archToDelete.add(archiveParentDir);
-        }
-        if (tableDataShouldBeDeleted && (part.getSd() != null) && 
(part.getSd().getLocation() != null)) {
-          Path partPath = new Path(part.getSd().getLocation());
-          verifyIsWritablePath(partPath);
-          dirsToDelete.add(new PathAndDepth(partPath, 
part.getValues().size()));
+        if (tableDataShouldBeDeleted) {
+          if (MetaStoreUtils.isArchived(part)) {
+            // Archived partition is only able to delete original location.
+            Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
+            verifyIsWritablePath(archiveParentDir);
+            archToDelete.add(archiveParentDir);
+          } else if ((part.getSd() != null) && (part.getSd().getLocation() != 
null)) {
+            Path partPath = new Path(part.getSd().getLocation());
+            verifyIsWritablePath(partPath);
+            dirsToDelete.add(new PathAndDepth(partPath, 
part.getValues().size()));
+          }
         }
       }
 

Reply via email to