This is an automated email from the ASF dual-hosted git repository.
sbadhya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ffefb7daba4 HIVE-28903: Skip deleting archived path when drop
partition/table (#5769)
ffefb7daba4 is described below
commit ffefb7daba454ee6559b1b92c6bc1fc6bc522094
Author: Wechar Yu <[email protected]>
AuthorDate: Tue May 6 21:09:04 2025 +0800
HIVE-28903: Skip deleting archived path when drop partition/table (#5769)
---
ql/src/test/queries/clientpositive/archive_drop.q | 28 +++++
.../results/clientpositive/llap/archive_drop.q.out | 122 +++++++++++++++++++++
.../apache/hadoop/hive/metastore/Warehouse.java | 5 +
.../apache/hadoop/hive/metastore/HMSHandler.java | 40 +++----
4 files changed, 176 insertions(+), 19 deletions(-)
diff --git a/ql/src/test/queries/clientpositive/archive_drop.q
b/ql/src/test/queries/clientpositive/archive_drop.q
new file mode 100644
index 00000000000..f60106f63ba
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/archive_drop.q
@@ -0,0 +1,28 @@
+set hive.mapred.mode=nonstrict;
+set hive.archive.enabled = true;
+
+create database test_db;
+
+create table test_db.test_tbl (id int, name string) partitioned by (dt date,
hr string);
+
+insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='11')
select 1, 'tom';
+insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='12')
select 2, 'jerry';
+insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='13')
select 3, 'spike';
+
+show partitions test_db.test_tbl;
+
+alter table test_db.test_tbl archive partition (dt='2025-04-01');
+dfs -ls
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/;
+dfs -ls
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/data.har/;
+
+show partitions test_db.test_tbl;
+
+alter table test_db.test_tbl drop partition (dt='2025-04-01',hr='12');
+dfs -ls
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/;
+dfs -ls
${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/data.har/;
+
+show partitions test_db.test_tbl;
+
+select * from test_db.test_tbl;
+
+drop table test_db.test_tbl;
diff --git a/ql/src/test/results/clientpositive/llap/archive_drop.q.out
b/ql/src/test/results/clientpositive/llap/archive_drop.q.out
new file mode 100644
index 00000000000..26e472009f0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/archive_drop.q.out
@@ -0,0 +1,122 @@
+PREHOOK: query: create database test_db
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:test_db
+POSTHOOK: query: create database test_db
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:test_db
+PREHOOK: query: create table test_db.test_tbl (id int, name string)
partitioned by (dt date, hr string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:test_db
+PREHOOK: Output: test_db@test_tbl
+POSTHOOK: query: create table test_db.test_tbl (id int, name string)
partitioned by (dt date, hr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:test_db
+POSTHOOK: Output: test_db@test_tbl
+PREHOOK: query: insert overwrite table test_db.test_tbl partition
(dt='2025-04-01', hr='11') select 1, 'tom'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: query: insert overwrite table test_db.test_tbl partition
(dt='2025-04-01', hr='11') select 1, 'tom'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=11).id SIMPLE []
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=11).name SIMPLE []
+PREHOOK: query: insert overwrite table test_db.test_tbl partition
(dt='2025-04-01', hr='12') select 2, 'jerry'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: query: insert overwrite table test_db.test_tbl partition
(dt='2025-04-01', hr='12') select 2, 'jerry'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=12).id SIMPLE []
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=12).name SIMPLE []
+PREHOOK: query: insert overwrite table test_db.test_tbl partition
(dt='2025-04-01', hr='13') select 3, 'spike'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+POSTHOOK: query: insert overwrite table test_db.test_tbl partition
(dt='2025-04-01', hr='13') select 3, 'spike'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=13).id SIMPLE []
+POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=13).name SIMPLE []
+PREHOOK: query: show partitions test_db.test_tbl
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: test_db@test_tbl
+POSTHOOK: query: show partitions test_db.test_tbl
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: test_db@test_tbl
+dt=2025-04-01/hr=11
+dt=2025-04-01/hr=12
+dt=2025-04-01/hr=13
+PREHOOK: query: alter table test_db.test_tbl archive partition
(dt='2025-04-01')
+PREHOOK: type: ALTERTABLE_ARCHIVE
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+POSTHOOK: query: alter table test_db.test_tbl archive partition
(dt='2025-04-01')
+POSTHOOK: type: ALTERTABLE_ARCHIVE
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
+Found 1 items
+#### A masked pattern was here ####
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: show partitions test_db.test_tbl
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: test_db@test_tbl
+POSTHOOK: query: show partitions test_db.test_tbl
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: test_db@test_tbl
+dt=2025-04-01/hr=11
+dt=2025-04-01/hr=12
+dt=2025-04-01/hr=13
+PREHOOK: query: alter table test_db.test_tbl drop partition
(dt='2025-04-01',hr='12')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+POSTHOOK: query: alter table test_db.test_tbl drop partition
(dt='2025-04-01',hr='12')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
+Found 1 items
+#### A masked pattern was here ####
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: show partitions test_db.test_tbl
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: test_db@test_tbl
+POSTHOOK: query: show partitions test_db.test_tbl
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: test_db@test_tbl
+dt=2025-04-01/hr=11
+dt=2025-04-01/hr=13
+PREHOOK: query: select * from test_db.test_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=11
+PREHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=13
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_db.test_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=11
+POSTHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=13
+#### A masked pattern was here ####
+1 tom 2025-04-01 11
+3 spike 2025-04-01 13
+PREHOOK: query: drop table test_db.test_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: test_db@test_tbl
+PREHOOK: Output: database:test_db
+PREHOOK: Output: test_db@test_tbl
+POSTHOOK: query: drop table test_db.test_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: test_db@test_tbl
+POSTHOOK: Output: database:test_db
+POSTHOOK: Output: test_db@test_tbl
diff --git
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
index 9fd9370a2a4..bbb15191cc5 100755
---
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
+++
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
@@ -47,6 +47,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.HarFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.metastore.ReplChangeManager.RecycleType;
@@ -467,6 +468,10 @@ public boolean deleteDir(Path f, boolean recursive,
boolean ifPurge, boolean nee
}
}
FileSystem fs = getFs(f);
+ if (fs instanceof HarFileSystem) {
+ LOG.warn("Har path {} is not supported to delete, skipping it.", f);
+ return true;
+ }
return fsHandler.deleteDir(fs, f, recursive, ifPurge, conf);
}
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
index b106c804fa1..55a1193f92c 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
@@ -3179,8 +3179,7 @@ private void deleteDataExcludeCmroot(Path path, boolean
ifPurge, boolean shouldE
wh.deleteDir(path, true, ifPurge, shouldEnableCm);
}
} catch (Exception e) {
- LOG.error("Failed to delete directory: " + path +
- " " + e.getMessage());
+ LOG.error("Failed to delete directory: {}", path, e);
}
}
@@ -5134,14 +5133,15 @@ private boolean drop_partition_common(RawStore ms,
String catName, String db_nam
throw new NoSuchObjectException("Partition doesn't exist. " +
part_vals);
}
isArchived = MetaStoreUtils.isArchived(part);
- if (tableDataShouldBeDeleted && isArchived) {
- archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
- verifyIsWritablePath(archiveParentDir);
- }
-
- if (tableDataShouldBeDeleted && (part.getSd() != null) &&
(part.getSd().getLocation() != null)) {
- partPath = new Path(part.getSd().getLocation());
- verifyIsWritablePath(partPath);
+ if (tableDataShouldBeDeleted) {
+ if (isArchived) {
+ // Archived partition is only able to delete original location.
+ archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
+ verifyIsWritablePath(archiveParentDir);
+ } else if ((part.getSd() != null) && (part.getSd().getLocation() !=
null)) {
+ partPath = new Path(part.getSd().getLocation());
+ verifyIsWritablePath(partPath);
+ }
}
String partName = Warehouse.makePartName(tbl.getPartitionKeys(),
part_vals);
@@ -5381,15 +5381,17 @@ public DropPartitionsResult drop_partitions_req(
if (colNames != null) {
partNames.add(FileUtils.makePartName(colNames, part.getValues()));
}
- if (tableDataShouldBeDeleted && MetaStoreUtils.isArchived(part)) {
- Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
- verifyIsWritablePath(archiveParentDir);
- archToDelete.add(archiveParentDir);
- }
- if (tableDataShouldBeDeleted && (part.getSd() != null) &&
(part.getSd().getLocation() != null)) {
- Path partPath = new Path(part.getSd().getLocation());
- verifyIsWritablePath(partPath);
- dirsToDelete.add(new PathAndDepth(partPath,
part.getValues().size()));
+ if (tableDataShouldBeDeleted) {
+ if (MetaStoreUtils.isArchived(part)) {
+ // Archived partition is only able to delete original location.
+ Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
+ verifyIsWritablePath(archiveParentDir);
+ archToDelete.add(archiveParentDir);
+ } else if ((part.getSd() != null) && (part.getSd().getLocation() !=
null)) {
+ Path partPath = new Path(part.getSd().getLocation());
+ verifyIsWritablePath(partPath);
+ dirsToDelete.add(new PathAndDepth(partPath,
part.getValues().size()));
+ }
}
}