From 64436d408d9866c2294856d0b1b8d1d9bffd8d49 Mon Sep 17 00:00:00 2001 From: DieterDP <90392398+DieterDP-ng@users.noreply.github.com> Date: Fri, 10 Oct 2025 11:56:56 +0200 Subject: [PATCH] HBASE-29604 BackupHFileCleaner uses flawed time based check (#7360) Adds javadoc mentioning the concurrent usage and thread-safety need of FileCleanerDelegate#getDeletableFiles. Fixes a potential thread-safety issue in BackupHFileCleaner: this class tracks timestamps to block the deletion of recently loaded HFiles that might be needed for backup purposes. The timestamps were being registered from inside the concurrent method, which could result in recently added files getting deleted. Moved the timestamp registration to the postClean method, which is called only a single time per cleaner run, so recently loaded HFiles are in fact protected from deletion. Signed-off-by: Nick Dimiduk --- .../hadoop/hbase/backup/BackupHFileCleaner.java | 17 ++++++++++------- .../hbase/backup/TestBackupHFileCleaner.java | 13 ++++++++++--- .../master/cleaner/BaseFileCleanerDelegate.java | 4 ++++ .../master/cleaner/FileCleanerDelegate.java | 4 ++++ 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java index c9a76bef2891..bbbae2d631fe 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java @@ -52,10 +52,13 @@ public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements Abor private boolean stopped = false; private boolean aborted = false; private Connection connection; - // timestamp of most recent read from backup system table - private long prevReadFromBackupTbl = 0; - // timestamp of 2nd most recent read from backup system table - private long secondPrevReadFromBackupTbl = 0; + // timestamp of most recent completed cleaning run + private volatile long previousCleaningCompletionTimestamp = 0; + + @Override + public void postClean() { + previousCleaningCompletionTimestamp = EnvironmentEdgeManager.currentTime(); + } @Override public Iterable getDeletableFiles(Iterable files) { @@ -79,12 +82,12 @@ public Iterable getDeletableFiles(Iterable files) { return Collections.emptyList(); } - secondPrevReadFromBackupTbl = prevReadFromBackupTbl; - prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime(); + // Pin the threshold, we don't want the result to change depending on evaluation time. + final long recentFileThreshold = previousCleaningCompletionTimestamp; return Iterables.filter(files, file -> { // If the file is recent, be conservative and wait for one more scan of the bulk loads - if (file.getModificationTime() > secondPrevReadFromBackupTbl) { + if (file.getModificationTime() > recentFileThreshold) { LOG.debug("Preventing deletion due to timestamp: {}", file.getPath().toString()); return false; } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java index 9989748746cb..ef72b994c773 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupHFileCleaner.java @@ -108,11 +108,11 @@ protected Set fetchFullyBackedUpTables(BackupSystemTable tbl) { Iterable deletable; // The first call will not allow any deletions because of the timestamp mechanism. - deletable = cleaner.getDeletableFiles(List.of(file1, file1Archived, file2, file3)); + deletable = callCleaner(cleaner, List.of(file1, file1Archived, file2, file3)); assertEquals(Set.of(), Sets.newHashSet(deletable)); // No bulk loads registered, so all files can be deleted. - deletable = cleaner.getDeletableFiles(List.of(file1, file1Archived, file2, file3)); + deletable = callCleaner(cleaner, List.of(file1, file1Archived, file2, file3)); assertEquals(Set.of(file1, file1Archived, file2, file3), Sets.newHashSet(deletable)); // Register some bulk loads. @@ -125,10 +125,17 @@ protected Set fetchFullyBackedUpTables(BackupSystemTable tbl) { } // File 1 can no longer be deleted, because it is registered as a bulk load. - deletable = cleaner.getDeletableFiles(List.of(file1, file1Archived, file2, file3)); + deletable = callCleaner(cleaner, List.of(file1, file1Archived, file2, file3)); assertEquals(Set.of(file2, file3), Sets.newHashSet(deletable)); } + private Iterable callCleaner(BackupHFileCleaner cleaner, Iterable files) { + cleaner.preClean(); + Iterable deletable = cleaner.getDeletableFiles(files); + cleaner.postClean(); + return deletable; + } + private FileStatus createFile(String fileName) throws IOException { Path file = new Path(root, fileName); fs.createNewFile(file); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java index 4c24ba1f81c5..700914f07b90 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/BaseFileCleanerDelegate.java @@ -44,6 +44,10 @@ public void init(Map params) { /** * Should the master delete the file or keep it? + *

+ * This method can be called concurrently by multiple threads. Implementations must be thread + * safe. + *

* @param fStat file status of the file to check * @return true if the file is deletable, false if not */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java index e08f53294336..714aaffaa052 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/FileCleanerDelegate.java @@ -33,6 +33,10 @@ public interface FileCleanerDelegate extends Configurable, Stoppable { /** * Determines which of the given files are safe to delete + *

+ * This method can be called concurrently by multiple threads. Implementations must be thread + * safe. + *

* @param files files to check for deletion * @return files that are ok to delete according to this cleaner */