This is an automated email from the ASF dual-hosted git repository.
taklwu pushed a commit to branch HBASE-28957
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/HBASE-28957 by this push:
new 0b1ec92e2de HBASE-29524 Handle bulk-loaded HFiles in delete and
cleanup process (#7239)
0b1ec92e2de is described below
commit 0b1ec92e2de6ddbb2d0c28e5bf67a12dd7880e7d
Author: vinayak hegde <[email protected]>
AuthorDate: Wed Aug 27 03:55:29 2025 +0530
HBASE-29524 Handle bulk-loaded HFiles in delete and cleanup process (#7239)
Signed-off-by: Tak Lon (Stephen) Wu <[email protected]>
Reviewed by: Kota-SH <[email protected]>
---
.../hadoop/hbase/backup/impl/BackupCommands.java | 16 +++++++++++++++-
.../hbase/backup/TestBackupDeleteWithCleanup.java | 19 ++++++++++++++++---
.../hadoop/hbase/backup/impl/TestBackupCommands.java | 7 ++++++-
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
index 3ae97c487ef..0b02a5edd89 100644
---
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
+++
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
@@ -1004,6 +1004,7 @@ public final class BackupCommands {
new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER,
conf, backupWalDir);
FileSystem fs = manager.getBackupFs();
Path walDir = manager.getWalsDir();
+ Path bulkloadDir = manager.getBulkLoadFilesDir();
// Delete contents under WAL directory
if (fs.exists(walDir)) {
@@ -1014,6 +1015,15 @@ public final class BackupCommands {
System.out.println("Deleted all contents under WAL directory: " +
walDir);
}
+ // Delete contents under bulk load directory
+ if (fs.exists(bulkloadDir)) {
+ FileStatus[] bulkContents = fs.listStatus(bulkloadDir);
+ for (FileStatus item : bulkContents) {
+ fs.delete(item.getPath(), true); // recursive delete of each child
+ }
+ System.out.println("Deleted all contents under Bulk Load directory:
" + bulkloadDir);
+ }
+
} catch (IOException e) {
System.out.println("WARNING: Failed to delete contents under backup
directories: "
+ backupWalDir + ". Error: " + e.getMessage());
@@ -1022,7 +1032,7 @@ public final class BackupCommands {
}
/**
- * Cleans up old WAL files based on the determined cutoff timestamp.
+ * Cleans up old WAL and bulk-loaded files based on the determined cutoff
timestamp.
*/
void deleteOldWALFiles(Configuration conf, String backupWalDir, long
cutoffTime)
throws IOException {
@@ -1033,6 +1043,7 @@ public final class BackupCommands {
new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf,
backupWalDir);
FileSystem fs = manager.getBackupFs();
Path walDir = manager.getWalsDir();
+ Path bulkloadDir = manager.getBulkLoadFilesDir();
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -1058,6 +1069,9 @@ public final class BackupCommands {
if (dayStart + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
System.out.println("Deleting outdated WAL directory: " + dirPath);
fs.delete(dirPath, true);
+ Path bulkloadPath = new Path(bulkloadDir, dirName);
+ System.out.println("Deleting corresponding bulk-load directory: "
+ bulkloadPath);
+ fs.delete(bulkloadPath, true);
}
} catch (ParseException e) {
System.out.println("WARNING: Failed to parse directory name '" +
dirName
diff --git
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
index d22f4c9cda9..07c9110072b 100644
---
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
+++
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.backup;
import static
org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
import static
org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
+import static
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
import static
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
import static
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
import static
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
@@ -164,7 +165,7 @@ public class TestBackupDeleteWithCleanup extends
TestBackupBase {
// Step 6: Verify that the backup WAL directory is empty
assertTrue("WAL backup directory should be empty after force delete",
- isWalDirsEmpty(conf1, backupWalDir.toString()));
+ areWalAndBulkloadDirsEmpty(conf1, backupWalDir.toString()));
// Step 7: Take new full backup with continuous backup enabled
String backupIdContinuous =
fullTableBackupWithContinuous(Lists.newArrayList(table1));
@@ -189,28 +190,35 @@ public class TestBackupDeleteWithCleanup extends
TestBackupBase {
public static void setupBackupFolders(FileSystem fs, Path backupWalDir, long
currentTime)
throws IOException {
Path walsDir = new Path(backupWalDir, WALS_DIR);
+ Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
fs.mkdirs(walsDir);
+ fs.mkdirs(bulkLoadDir);
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
for (int i = 0; i < 5; i++) {
String dateStr = dateFormat.format(new Date(currentTime - (i *
ONE_DAY_IN_MILLISECONDS)));
fs.mkdirs(new Path(walsDir, dateStr));
+ fs.mkdirs(new Path(bulkLoadDir, dateStr));
}
}
private static void verifyBackupCleanup(FileSystem fs, Path backupWalDir,
long currentTime)
throws IOException {
Path walsDir = new Path(backupWalDir, WALS_DIR);
+ Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
// Expect folders older than 3 days to be deleted
for (int i = 3; i < 5; i++) {
String oldDateStr = dateFormat.format(new Date(currentTime - (i *
ONE_DAY_IN_MILLISECONDS)));
Path walPath = new Path(walsDir, oldDateStr);
+ Path bulkLoadPath = new Path(bulkLoadDir, oldDateStr);
assertFalse("Old WAL directory (" + walPath + ") should be deleted, but
it exists!",
fs.exists(walPath));
+ assertFalse("Old BulkLoad directory (" + bulkLoadPath + ") should be
deleted, but it exists!",
+ fs.exists(bulkLoadPath));
}
// Expect folders within the last 3 days to exist
@@ -218,9 +226,13 @@ public class TestBackupDeleteWithCleanup extends
TestBackupBase {
String recentDateStr =
dateFormat.format(new Date(currentTime - (i *
ONE_DAY_IN_MILLISECONDS)));
Path walPath = new Path(walsDir, recentDateStr);
+ Path bulkLoadPath = new Path(bulkLoadDir, recentDateStr);
assertTrue("Recent WAL directory (" + walPath + ") should exist, but it
is missing!",
fs.exists(walPath));
+ assertTrue(
+ "Recent BulkLoad directory (" + bulkLoadPath + ") should exist, but it
is missing!",
+ fs.exists(bulkLoadPath));
}
}
@@ -264,15 +276,16 @@ public class TestBackupDeleteWithCleanup extends
TestBackupBase {
peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER) &&
peer.isEnabled());
}
- private static boolean isWalDirsEmpty(Configuration conf, String
backupWalDir)
+ private static boolean areWalAndBulkloadDirsEmpty(Configuration conf, String
backupWalDir)
throws IOException {
BackupFileSystemManager manager =
new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf,
backupWalDir);
FileSystem fs = manager.getBackupFs();
Path walDir = manager.getWalsDir();
+ Path bulkloadDir = manager.getBulkLoadFilesDir();
- return isDirectoryEmpty(fs, walDir);
+ return isDirectoryEmpty(fs, walDir) && isDirectoryEmpty(fs, bulkloadDir);
}
private static boolean isDirectoryEmpty(FileSystem fs, Path dirPath) throws
IOException {
diff --git
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
index e00ebd6099f..be14227e4cc 100644
---
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
+++
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.backup.impl;
import static
org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
import static
org.apache.hadoop.hbase.backup.TestBackupDeleteWithCleanup.logDirectoryStructure;
import static
org.apache.hadoop.hbase.backup.TestBackupDeleteWithCleanup.setupBackupFolders;
+import static
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
import static
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
import static
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
import static
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
@@ -134,7 +135,7 @@ public class TestBackupCommands extends TestBackupBase {
fs.mkdirs(backupWalDir);
long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
- setupBackupFolders(fs, backupWalDir, currentTime); // Create 5 days of
WALs folders
+ setupBackupFolders(fs, backupWalDir, currentTime); // Create 5 days of
WAL/bulkload-files folder
logDirectoryStructure(fs, backupWalDir, "Before cleanup:");
@@ -154,6 +155,7 @@ public class TestBackupCommands extends TestBackupBase {
private static void verifyCleanupOutcome(FileSystem fs, Path backupWalDir,
long currentTime,
long cutoffTime) throws IOException {
Path walsDir = new Path(backupWalDir, WALS_DIR);
+ Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -161,11 +163,14 @@ public class TestBackupCommands extends TestBackupBase {
long dayTime = currentTime - (i * ONE_DAY_IN_MILLISECONDS);
String dayDir = dateFormat.format(new Date(dayTime));
Path walPath = new Path(walsDir, dayDir);
+ Path bulkPath = new Path(bulkLoadDir, dayDir);
if (dayTime + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
assertFalse("Old WAL dir should be deleted: " + walPath,
fs.exists(walPath));
+ assertFalse("Old BulkLoad dir should be deleted: " + bulkPath,
fs.exists(bulkPath));
} else {
assertTrue("Recent WAL dir should exist: " + walPath,
fs.exists(walPath));
+ assertTrue("Recent BulkLoad dir should exist: " + bulkPath,
fs.exists(bulkPath));
}
}
}