This is an automated email from the ASF dual-hosted git repository.

ndimiduk pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 543cc189f72 HBASE-29604 BackupHFileCleaner uses flawed time based 
check (#7360)
543cc189f72 is described below

commit 543cc189f7215c6e708cd9ef7cad8cee5da99c1d
Author: DieterDP <[email protected]>
AuthorDate: Fri Oct 10 11:56:56 2025 +0200

    HBASE-29604 BackupHFileCleaner uses flawed time based check (#7360)
    
    Adds javadoc mentioning the concurrent usage and thread-safety need of
    FileCleanerDelegate#getDeletableFiles.
    
    Fixes a potential thread-safety issue in BackupHFileCleaner: this class
    tracks timestamps to block the deletion of recently loaded HFiles that
    might be needed for backup purposes. The timestamps were being registered
    from inside the concurrent method, which could result in recently added
    files getting deleted. Moved the timestamp registration to the postClean
    method, which is called only a single time per cleaner run, so recently
    loaded HFiles are in fact protected from deletion.
    
    Signed-off-by: Nick Dimiduk <[email protected]>
---
 .../apache/hadoop/hbase/backup/BackupHFileCleaner.java  | 17 ++++++++++-------
 .../hadoop/hbase/backup/TestBackupHFileCleaner.java     | 13 ++++++++++---
 .../hbase/master/cleaner/BaseFileCleanerDelegate.java   |  4 ++++
 .../hbase/master/cleaner/FileCleanerDelegate.java       |  4 ++++
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
index c9a76bef289..bbbae2d631f 100644
--- 
a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
+++ 
b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java
@@ -52,10 +52,13 @@ public class BackupHFileCleaner extends 
BaseHFileCleanerDelegate implements Abor
   private boolean stopped = false;
   private boolean aborted = false;
   private Connection connection;
-  // timestamp of most recent read from backup system table
-  private long prevReadFromBackupTbl = 0;
-  // timestamp of 2nd most recent read from backup system table
-  private long secondPrevReadFromBackupTbl = 0;
+  // timestamp of most recent completed cleaning run
+  private volatile long previousCleaningCompletionTimestamp = 0;
+
+  @Override
+  public void postClean() {
+    previousCleaningCompletionTimestamp = EnvironmentEdgeManager.currentTime();
+  }
 
   @Override
   public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
@@ -79,12 +82,12 @@ public class BackupHFileCleaner extends 
BaseHFileCleanerDelegate implements Abor
       return Collections.emptyList();
     }
 
-    secondPrevReadFromBackupTbl = prevReadFromBackupTbl;
-    prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime();
+    // Pin the threshold, we don't want the result to change depending on 
evaluation time.
+    final long recentFileThreshold = previousCleaningCompletionTimestamp;
 
     return Iterables.filter(files, file -> {
       // If the file is recent, be conservative and wait for one more scan of 
the bulk loads
-      if (file.getModificationTime() > secondPrevReadFromBackupTbl) {
+      if (file.getModificationTime() > recentFileThreshold) {
         LOG.debug("Preventing deletion due to timestamp: {}", 
file.getPath().toString());
         return false;
       }
diff --git 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
index bfc729aa679..3c5cff9fa63 100644
--- 
a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
+++ 
b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java
@@ -108,11 +108,11 @@ public class TestBackupHFileCleaner {
     Iterable<FileStatus> deletable;
 
     // The first call will not allow any deletions because of the timestamp 
mechanism.
-    deletable = cleaner.getDeletableFiles(Arrays.asList(file1, file1Archived, 
file2, file3));
+    deletable = callCleaner(cleaner, Arrays.asList(file1, file1Archived, 
file2, file3));
     assertEquals(Collections.emptySet(), Sets.newHashSet(deletable));
 
     // No bulk loads registered, so all files can be deleted.
-    deletable = cleaner.getDeletableFiles(Arrays.asList(file1, file1Archived, 
file2, file3));
+    deletable = callCleaner(cleaner, Arrays.asList(file1, file1Archived, 
file2, file3));
     assertEquals(Sets.newHashSet(file1, file1Archived, file2, file3), 
Sets.newHashSet(deletable));
 
     // Register some bulk loads.
@@ -125,10 +125,17 @@ public class TestBackupHFileCleaner {
     }
 
     // File 1 can no longer be deleted, because it is registered as a bulk 
load.
-    deletable = cleaner.getDeletableFiles(Arrays.asList(file1, file1Archived, 
file2, file3));
+    deletable = callCleaner(cleaner, Arrays.asList(file1, file1Archived, 
file2, file3));
     assertEquals(Sets.newHashSet(file2, file3), Sets.newHashSet(deletable));
   }
 
+  private Iterable<FileStatus> callCleaner(BackupHFileCleaner cleaner, 
Iterable<FileStatus> files) {
+    cleaner.preClean();
+    Iterable<FileStatus> deletable = cleaner.getDeletableFiles(files);
+    cleaner.postClean();
+    return deletable;
+  }
+
   private FileStatus createFile(String fileName) throws IOException {
     Path file = new Path(root, fileName);
     fs.createNewFile(file);
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java
index 4c24ba1f81c..700914f07b9 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java
@@ -44,6 +44,10 @@ public abstract class BaseFileCleanerDelegate extends 
BaseConfigurable
 
   /**
    * Should the master delete the file or keep it?
+   * <p>
+   * This method can be called concurrently by multiple threads. 
Implementations must be thread
+   * safe.
+   * </p>
    * @param fStat file status of the file to check
    * @return <tt>true</tt> if the file is deletable, <tt>false</tt> if not
    */
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
index d37bb620273..438f34a891c 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java
@@ -33,6 +33,10 @@ public interface FileCleanerDelegate extends Configurable, 
Stoppable {
 
   /**
    * Determines which of the given files are safe to delete
+   * <p>
+   * This method can be called concurrently by multiple threads. 
Implementations must be thread
+   * safe.
+   * </p>
    * @param files files to check for deletion
    * @return files that are ok to delete according to this cleaner
    */

Reply via email to