This is an automated email from the ASF dual-hosted git repository.

andor pushed a commit to branch HBASE-28957_rebased
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 5d815b82b5655a9f2aa9d39b9bcd5b601ba914cd
Author: vinayak hegde <[email protected]>
AuthorDate: Wed Aug 27 03:55:29 2025 +0530

    HBASE-29524 Handle bulk-loaded HFiles in delete and cleanup process (#7239)
    
    Signed-off-by: Tak Lon (Stephen) Wu <[email protected]>
    Reviewed by: Kota-SH <[email protected]>
---
 .../hadoop/hbase/backup/impl/BackupCommands.java      | 16 +++++++++++++++-
 .../hbase/backup/TestBackupDeleteWithCleanup.java     | 19 ++++++++++++++++---
 .../hadoop/hbase/backup/impl/TestBackupCommands.java  |  7 ++++++-
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
index 3ae97c487ef..0b02a5edd89 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
@@ -1004,6 +1004,7 @@ public final class BackupCommands {
           new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, 
conf, backupWalDir);
         FileSystem fs = manager.getBackupFs();
         Path walDir = manager.getWalsDir();
+        Path bulkloadDir = manager.getBulkLoadFilesDir();
 
         // Delete contents under WAL directory
         if (fs.exists(walDir)) {
@@ -1014,6 +1015,15 @@ public final class BackupCommands {
           System.out.println("Deleted all contents under WAL directory: " + 
walDir);
         }
 
+        // Delete contents under bulk load directory
+        if (fs.exists(bulkloadDir)) {
+          FileStatus[] bulkContents = fs.listStatus(bulkloadDir);
+          for (FileStatus item : bulkContents) {
+            fs.delete(item.getPath(), true); // recursive delete of each child
+          }
+          System.out.println("Deleted all contents under Bulk Load directory: 
" + bulkloadDir);
+        }
+
       } catch (IOException e) {
         System.out.println("WARNING: Failed to delete contents under backup 
directories: "
           + backupWalDir + ". Error: " + e.getMessage());
@@ -1022,7 +1032,7 @@ public final class BackupCommands {
     }
 
     /**
-     * Cleans up old WAL files based on the determined cutoff timestamp.
+     * Cleans up old WAL and bulk-loaded files based on the determined cutoff 
timestamp.
      */
     void deleteOldWALFiles(Configuration conf, String backupWalDir, long 
cutoffTime)
       throws IOException {
@@ -1033,6 +1043,7 @@ public final class BackupCommands {
         new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, 
backupWalDir);
       FileSystem fs = manager.getBackupFs();
       Path walDir = manager.getWalsDir();
+      Path bulkloadDir = manager.getBulkLoadFilesDir();
 
       SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
       dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -1058,6 +1069,9 @@ public final class BackupCommands {
           if (dayStart + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
             System.out.println("Deleting outdated WAL directory: " + dirPath);
             fs.delete(dirPath, true);
+            Path bulkloadPath = new Path(bulkloadDir, dirName);
+            System.out.println("Deleting corresponding bulk-load directory: " 
+ bulkloadPath);
+            fs.delete(bulkloadPath, true);
           }
         } catch (ParseException e) {
           System.out.println("WARNING: Failed to parse directory name '" + 
dirName
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
index d22f4c9cda9..07c9110072b 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.backup;
 
 import static 
org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
 import static 
org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
+import static 
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
 import static 
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
 import static 
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
 import static 
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
@@ -164,7 +165,7 @@ public class TestBackupDeleteWithCleanup extends 
TestBackupBase {
 
     // Step 6: Verify that the backup WAL directory is empty
     assertTrue("WAL backup directory should be empty after force delete",
-      isWalDirsEmpty(conf1, backupWalDir.toString()));
+      areWalAndBulkloadDirsEmpty(conf1, backupWalDir.toString()));
 
     // Step 7: Take new full backup with continuous backup enabled
     String backupIdContinuous = 
fullTableBackupWithContinuous(Lists.newArrayList(table1));
@@ -189,28 +190,35 @@ public class TestBackupDeleteWithCleanup extends 
TestBackupBase {
   public static void setupBackupFolders(FileSystem fs, Path backupWalDir, long 
currentTime)
     throws IOException {
     Path walsDir = new Path(backupWalDir, WALS_DIR);
+    Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
 
     fs.mkdirs(walsDir);
+    fs.mkdirs(bulkLoadDir);
 
     SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
 
     for (int i = 0; i < 5; i++) {
       String dateStr = dateFormat.format(new Date(currentTime - (i * 
ONE_DAY_IN_MILLISECONDS)));
       fs.mkdirs(new Path(walsDir, dateStr));
+      fs.mkdirs(new Path(bulkLoadDir, dateStr));
     }
   }
 
   private static void verifyBackupCleanup(FileSystem fs, Path backupWalDir, 
long currentTime)
     throws IOException {
     Path walsDir = new Path(backupWalDir, WALS_DIR);
+    Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
     SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
 
     // Expect folders older than 3 days to be deleted
     for (int i = 3; i < 5; i++) {
       String oldDateStr = dateFormat.format(new Date(currentTime - (i * 
ONE_DAY_IN_MILLISECONDS)));
       Path walPath = new Path(walsDir, oldDateStr);
+      Path bulkLoadPath = new Path(bulkLoadDir, oldDateStr);
       assertFalse("Old WAL directory (" + walPath + ") should be deleted, but 
it exists!",
         fs.exists(walPath));
+      assertFalse("Old BulkLoad directory (" + bulkLoadPath + ") should be 
deleted, but it exists!",
+        fs.exists(bulkLoadPath));
     }
 
     // Expect folders within the last 3 days to exist
@@ -218,9 +226,13 @@ public class TestBackupDeleteWithCleanup extends 
TestBackupBase {
       String recentDateStr =
         dateFormat.format(new Date(currentTime - (i * 
ONE_DAY_IN_MILLISECONDS)));
       Path walPath = new Path(walsDir, recentDateStr);
+      Path bulkLoadPath = new Path(bulkLoadDir, recentDateStr);
 
       assertTrue("Recent WAL directory (" + walPath + ") should exist, but it 
is missing!",
         fs.exists(walPath));
+      assertTrue(
+        "Recent BulkLoad directory (" + bulkLoadPath + ") should exist, but it 
is missing!",
+        fs.exists(bulkLoadPath));
     }
   }
 
@@ -264,15 +276,16 @@ public class TestBackupDeleteWithCleanup extends 
TestBackupBase {
       peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER) && 
peer.isEnabled());
   }
 
-  private static boolean isWalDirsEmpty(Configuration conf, String 
backupWalDir)
+  private static boolean areWalAndBulkloadDirsEmpty(Configuration conf, String 
backupWalDir)
     throws IOException {
     BackupFileSystemManager manager =
       new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, 
backupWalDir);
 
     FileSystem fs = manager.getBackupFs();
     Path walDir = manager.getWalsDir();
+    Path bulkloadDir = manager.getBulkLoadFilesDir();
 
-    return isDirectoryEmpty(fs, walDir);
+    return isDirectoryEmpty(fs, walDir) && isDirectoryEmpty(fs, bulkloadDir);
   }
 
   private static boolean isDirectoryEmpty(FileSystem fs, Path dirPath) throws 
IOException {
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
index e00ebd6099f..be14227e4cc 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/impl/TestBackupCommands.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.backup.impl;
 import static 
org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
 import static 
org.apache.hadoop.hbase.backup.TestBackupDeleteWithCleanup.logDirectoryStructure;
 import static 
org.apache.hadoop.hbase.backup.TestBackupDeleteWithCleanup.setupBackupFolders;
+import static 
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
 import static 
org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
 import static 
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
 import static 
org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
@@ -134,7 +135,7 @@ public class TestBackupCommands extends TestBackupBase {
     fs.mkdirs(backupWalDir);
 
     long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
-    setupBackupFolders(fs, backupWalDir, currentTime); // Create 5 days of 
WALs folders
+    setupBackupFolders(fs, backupWalDir, currentTime); // Create 5 days of 
WAL/bulkload-files folder
 
     logDirectoryStructure(fs, backupWalDir, "Before cleanup:");
 
@@ -154,6 +155,7 @@ public class TestBackupCommands extends TestBackupBase {
   private static void verifyCleanupOutcome(FileSystem fs, Path backupWalDir, 
long currentTime,
     long cutoffTime) throws IOException {
     Path walsDir = new Path(backupWalDir, WALS_DIR);
+    Path bulkLoadDir = new Path(backupWalDir, BULKLOAD_FILES_DIR);
     SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
     dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
 
@@ -161,11 +163,14 @@ public class TestBackupCommands extends TestBackupBase {
       long dayTime = currentTime - (i * ONE_DAY_IN_MILLISECONDS);
       String dayDir = dateFormat.format(new Date(dayTime));
       Path walPath = new Path(walsDir, dayDir);
+      Path bulkPath = new Path(bulkLoadDir, dayDir);
 
       if (dayTime + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
         assertFalse("Old WAL dir should be deleted: " + walPath, 
fs.exists(walPath));
+        assertFalse("Old BulkLoad dir should be deleted: " + bulkPath, 
fs.exists(bulkPath));
       } else {
         assertTrue("Recent WAL dir should exist: " + walPath, 
fs.exists(walPath));
+        assertTrue("Recent BulkLoad dir should exist: " + bulkPath, 
fs.exists(bulkPath));
       }
     }
   }

Reply via email to