From 5e7cb4d804d6ecb81d3b21a96b1d955b4d1a7e66 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Tue, 24 Sep 2024 14:15:25 -0400 Subject: [PATCH 1/4] HBASE-28882 Backup restores are broken if the backup has moved locations --- .../hbase/backup/HBackupFileSystem.java | 23 ++++++++- .../hbase/backup/impl/BackupManifest.java | 23 ++++++++- .../hbase/backup/TestHBackupFileSystem.java | 48 +++++++++++++++++++ .../hbase/backup/TestIncrementalBackup.java | 24 +++++++++- 4 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestHBackupFileSystem.java diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java index 2b27e7527477..c167dd814a00 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.backup; +import com.google.errorprone.annotations.RestrictedApi; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -27,6 +28,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; + /** * View to an on-disk Backup Image FileSytem Provides the set of methods necessary to interact with * the on-disk Backup Image data. @@ -97,10 +100,18 @@ public static Path getTableBackupPath(TableName tableName, Path backupRootPath, private static Path getManifestPath(Configuration conf, Path backupRootPath, String backupId) throws IOException { + return getManifestPath(conf, backupRootPath, backupId, true); + } + + /* Visible for testing only */ + @RestrictedApi(explanation = "Should only be called internally or in tests", link = "", + allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/HBackupFileSystem.java)") + static Path getManifestPath(Configuration conf, Path backupRootPath, String backupId, + boolean throwIfNotFound) throws IOException { FileSystem fs = backupRootPath.getFileSystem(conf); Path manifestPath = new Path(getBackupPath(backupRootPath.toString(), backupId) + Path.SEPARATOR + BackupManifest.MANIFEST_FILE_NAME); - if (!fs.exists(manifestPath)) { + if (throwIfNotFound && !fs.exists(manifestPath)) { String errorMsg = "Could not find backup manifest " + BackupManifest.MANIFEST_FILE_NAME + " for " + backupId + ". File " + manifestPath + " does not exists. Did " + backupId + " correspond to previously taken backup ?"; @@ -109,6 +120,16 @@ private static Path getManifestPath(Configuration conf, Path backupRootPath, Str return manifestPath; } + public static Path getRootDirFromBackupPath(Path backupPath, String backupId) { + if (backupPath.getName().equals(BackupManifest.MANIFEST_FILE_NAME)) { + backupPath = backupPath.getParent(); + } + Preconditions.checkArgument(backupPath.getName().equals(backupId), + String.format("Backup path %s must end in backupId %s", backupPath, backupId)); + return new Path( + backupPath.toString().substring(0, backupPath.toString().indexOf("/" + backupId))); + } + public static BackupManifest getManifest(Configuration conf, Path backupRootPath, String backupId) throws IOException { BackupManifest manifest = diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java index d66b5886794c..6035e16a1d5e 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.backup.impl; +import com.google.errorprone.annotations.RestrictedApi; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -439,7 +440,7 @@ public BackupManifest(FileSystem fs, Path backupPath) throws BackupException { } catch (Exception e) { throw new BackupException(e); } - this.backupImage = BackupImage.fromProto(proto); + this.backupImage = hydrateRootDir(BackupImage.fromProto(proto), backupPath); LOG.debug("Loaded manifest instance from manifest file: " + BackupUtils.getPath(subFile.getPath())); return; @@ -452,6 +453,26 @@ public BackupManifest(FileSystem fs, Path backupPath) throws BackupException { } } + /* Visible for testing only */ + @RestrictedApi(explanation = "Should only be called internally or in tests", link = "", + allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/impl/BackupManifest.java)") + public static BackupImage hydrateRootDir(BackupImage backupImage, Path backupPath) + throws IOException { + String providedRootDir = + HBackupFileSystem.getRootDirFromBackupPath(backupPath, backupImage.backupId).toString(); + if (!providedRootDir.startsWith(backupImage.getRootDir())) { + LOG.info( + "Provided backup root ({}) is different from the BackupImage's root ({}). " + + "We will prefer the provided root for this Backup and its ancestors.", + providedRootDir, backupImage.getRootDir()); + backupImage.setRootDir(providedRootDir); + for (BackupImage ancestor : backupImage.getAncestors()) { + ancestor.setRootDir(providedRootDir); + } + } + return backupImage; + } + public BackupType getType() { return backupImage.getType(); } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestHBackupFileSystem.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestHBackupFileSystem.java new file mode 100644 index 000000000000..3bdc2f620794 --- /dev/null +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestHBackupFileSystem.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestHBackupFileSystem { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestHBackupFileSystem.class); + + private static final Path ROOT_DIR = new Path("/backup/root"); + private static final String BACKUP_ID = "123"; + + @Test + public void testRootDirManifestPathConversion() throws IOException { + Path manifestPath = + HBackupFileSystem.getManifestPath(new Configuration(), ROOT_DIR, BACKUP_ID, false); + Path convertedRootDir = HBackupFileSystem.getRootDirFromBackupPath(manifestPath, BACKUP_ID); + assertEquals(ROOT_DIR, convertedRootDir); + } +} diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java index b8beeaca143e..ca41a3714cf2 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java @@ -106,7 +106,10 @@ public void TestIncBackupRestore() throws Exception { insertIntoTable(conn, table1, mobName, 3, NB_ROWS_FAM3).close(); Admin admin = conn.getAdmin(); BackupAdminImpl client = new BackupAdminImpl(conn); + BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR); String backupIdFull = takeFullBackup(tables, client); + validateRootPathCanBeOverridden(BACKUP_ROOT_DIR, backupIdFull); + assertTrue(checkSucceeded(backupIdFull)); // #2 - insert some data to table Table t1 = insertIntoTable(conn, table1, famName, 1, ADD_ROWS); @@ -150,12 +153,13 @@ public void TestIncBackupRestore() throws Exception { // #3 - incremental backup for multiple tables tables = Lists.newArrayList(table1, table2); - BackupRequest request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); + request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); String backupIdIncMultiple = client.backupTables(request); assertTrue(checkSucceeded(backupIdIncMultiple)); BackupManifest manifest = HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR), backupIdIncMultiple); assertEquals(Sets.newHashSet(table1, table2), new HashSet<>(manifest.getTableList())); + validateRootPathCanBeOverridden(BACKUP_ROOT_DIR, backupIdIncMultiple); // add column family f2 to table1 // drop column family f3 @@ -183,6 +187,7 @@ public void TestIncBackupRestore() throws Exception { request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); String backupIdIncMultiple2 = client.backupTables(request); assertTrue(checkSucceeded(backupIdIncMultiple2)); + validateRootPathCanBeOverridden(BACKUP_ROOT_DIR, backupIdIncMultiple2); // #5 - restore full backup for all tables TableName[] tablesRestoreFull = new TableName[] { table1, table2 }; @@ -249,4 +254,21 @@ private String takeFullBackup(List tables, BackupAdminImpl backupAdmi checkSucceeded(backupId); return backupId; } + + /** + * Check that backup manifest can be produced for a different root. Users may want to move + * existing backups to a different location. + */ + private void validateRootPathCanBeOverridden(String originalPath, String backupId) + throws IOException { + String anotherRootDir = "/some/other/root/dir"; + Path anotherPath = new Path(anotherRootDir, backupId); + BackupManifest.BackupImage differentLocationImage = BackupManifest.hydrateRootDir( + HBackupFileSystem.getManifest(conf1, new Path(originalPath), backupId).getBackupImage(), + anotherPath); + assertEquals(differentLocationImage.getRootDir(), anotherRootDir); + for (BackupManifest.BackupImage ancestor : differentLocationImage.getAncestors()) { + assertEquals(ancestor.getRootDir(), anotherRootDir); + } + } } From 91871a4a9839d2c43d7734c2dad6bb9201b0ef0c Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Wed, 9 Oct 2024 12:22:10 -0400 Subject: [PATCH 2/4] Don't store BackupImage root dir in backup --- .../hadoop/hbase/backup/BackupInfo.java | 2 +- .../hbase/backup/HBackupFileSystem.java | 3 +-- .../hbase/backup/impl/BackupManifest.java | 19 +++++++++---------- .../hbase/backup/TestIncrementalBackup.java | 2 +- .../src/main/protobuf/Backup.proto | 3 ++- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java index 5a0094740d63..f0dc10b83619 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java @@ -267,7 +267,7 @@ public String getFailedMsg() { } public void setFailedMsg(String failedMsg) { - if (failedMsg.length() > MAX_FAILED_MESSAGE_LENGTH) { + if (failedMsg != null && failedMsg.length() > MAX_FAILED_MESSAGE_LENGTH) { failedMsg = failedMsg.substring(0, MAX_FAILED_MESSAGE_LENGTH); } this.failedMsg = failedMsg; diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java index c167dd814a00..d5fd9aaf4c34 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java @@ -126,8 +126,7 @@ public static Path getRootDirFromBackupPath(Path backupPath, String backupId) { } Preconditions.checkArgument(backupPath.getName().equals(backupId), String.format("Backup path %s must end in backupId %s", backupPath, backupId)); - return new Path( - backupPath.toString().substring(0, backupPath.toString().indexOf("/" + backupId))); + return backupPath.getParent(); } public static BackupManifest getManifest(Configuration conf, Path backupRootPath, String backupId) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java index 6035e16a1d5e..59ae3857f2ec 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java @@ -159,12 +159,17 @@ static BackupImage fromProto(BackupProtos.BackupImage im) { return image; } + /** + * This method deliberately does not include the backup root dir on the produced proto. This is + * because we don't want to persist the root dir on the backup itself, so that backups can still + * be used after they have moved locations. A restore's operator will always provide the root + * dir. + */ BackupProtos.BackupImage toProto() { BackupProtos.BackupImage.Builder builder = BackupProtos.BackupImage.newBuilder(); builder.setBackupId(backupId); builder.setCompleteTs(completeTs); builder.setStartTs(startTs); - builder.setBackupRootDir(rootDir); if (type == BackupType.FULL) { builder.setBackupType(BackupProtos.BackupType.FULL); } else { @@ -460,15 +465,9 @@ public static BackupImage hydrateRootDir(BackupImage backupImage, Path backupPat throws IOException { String providedRootDir = HBackupFileSystem.getRootDirFromBackupPath(backupPath, backupImage.backupId).toString(); - if (!providedRootDir.startsWith(backupImage.getRootDir())) { - LOG.info( - "Provided backup root ({}) is different from the BackupImage's root ({}). " - + "We will prefer the provided root for this Backup and its ancestors.", - providedRootDir, backupImage.getRootDir()); - backupImage.setRootDir(providedRootDir); - for (BackupImage ancestor : backupImage.getAncestors()) { - ancestor.setRootDir(providedRootDir); - } + backupImage.setRootDir(providedRootDir); + for (BackupImage ancestor : backupImage.getAncestors()) { + ancestor.setRootDir(providedRootDir); } return backupImage; } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java index ca41a3714cf2..aed4f8ae9520 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java @@ -268,7 +268,7 @@ private void validateRootPathCanBeOverridden(String originalPath, String backupI anotherPath); assertEquals(differentLocationImage.getRootDir(), anotherRootDir); for (BackupManifest.BackupImage ancestor : differentLocationImage.getAncestors()) { - assertEquals(ancestor.getRootDir(), anotherRootDir); + assertEquals(anotherRootDir, ancestor.getRootDir()); } } } diff --git a/hbase-protocol-shaded/src/main/protobuf/Backup.proto b/hbase-protocol-shaded/src/main/protobuf/Backup.proto index afe43122f848..5851346900bc 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Backup.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Backup.proto @@ -54,11 +54,12 @@ message TableServerTimestamp { /** * Structure keeps relevant info for backup restore session + * backup_root_dir is deprecated because we no longer store root dirs on disk HBASE-28882 */ message BackupImage { optional string backup_id = 1; optional BackupType backup_type = 2; - optional string backup_root_dir = 3; + optional string backup_root_dir = 3 [deprecated = true]; repeated TableName table_list = 4; optional uint64 start_ts = 5; optional uint64 complete_ts = 6; From d70c8dd998427da8e164f73bb97ac8f0b542adae Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Wed, 13 Nov 2024 07:28:46 -0500 Subject: [PATCH 3/4] Deprecated javadoc --- hbase-protocol-shaded/src/main/protobuf/Backup.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-protocol-shaded/src/main/protobuf/Backup.proto b/hbase-protocol-shaded/src/main/protobuf/Backup.proto index 5851346900bc..a114001ba504 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Backup.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Backup.proto @@ -54,7 +54,7 @@ message TableServerTimestamp { /** * Structure keeps relevant info for backup restore session - * backup_root_dir is deprecated because we no longer store root dirs on disk HBASE-28882 + * backup_root_dir was marked as deprecated in HBase 2.6.2, will be removed in 4.0.0. */ message BackupImage { optional string backup_id = 1; From 3e6d98a7e31100d083a16331fddd0216e14ea3b7 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Fri, 15 Nov 2024 08:47:22 -0500 Subject: [PATCH 4/4] Copy backup manifest on merge --- .../hbase/backup/mapreduce/MapReduceBackupMergeJob.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java index 1f9ae16c1dfa..0549a3371cf5 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java @@ -228,6 +228,7 @@ protected void copyMetaData(FileSystem fs, Path tmpBackupDir, Path backupDirPath if ( fileName.indexOf(FSTableDescriptors.TABLEINFO_DIR) > 0 || fileName.indexOf(HRegionFileSystem.REGION_INFO_FILE) > 0 + || fileName.indexOf(BackupManifest.MANIFEST_FILE_NAME) > 0 ) { toKeep.add(p); } @@ -235,6 +236,7 @@ protected void copyMetaData(FileSystem fs, Path tmpBackupDir, Path backupDirPath // Copy meta to destination for (Path p : toKeep) { Path newPath = convertToDest(p, backupDirPath); + LOG.info("Copying tmp metadata from {} to {}", p, newPath); copyFile(fs, p, newPath); } } @@ -310,8 +312,11 @@ protected void updateBackupManifest(String backupRoot, String mergedBackupId, List backupsToDelete) throws IllegalArgumentException, IOException { BackupManifest manifest = HBackupFileSystem.getManifest(conf, new Path(backupRoot), mergedBackupId); + LOG.info("Removing ancestors from merged backup {} : {}", mergedBackupId, backupsToDelete); manifest.getBackupImage().removeAncestors(backupsToDelete); // save back + LOG.info("Creating new manifest file for merged backup {} at root {}", mergedBackupId, + backupRoot); manifest.store(conf); } @@ -320,12 +325,14 @@ protected void deleteBackupImages(List backupIds, Connection conn, FileS // Delete from backup system table try (BackupSystemTable table = new BackupSystemTable(conn)) { for (String backupId : backupIds) { + LOG.info("Removing metadata for backup {}", backupId); table.deleteBackupInfo(backupId); } } // Delete from file system for (String backupId : backupIds) { + LOG.info("Purging backup {} from FileSystem", backupId); Path backupDirPath = HBackupFileSystem.getBackupPath(backupRoot, backupId); if (!fs.delete(backupDirPath, true)) {