This is an automated email from the ASF dual-hosted git repository. ndimiduk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push: new d1fc87eb1c3 HBASE-28502 Cleanup old backup manifest logic (#5871) d1fc87eb1c3 is described below commit d1fc87eb1c3a36d46e18f84a6abf75f3106ef048 Author: DieterDP <90392398+dieterdp...@users.noreply.github.com> AuthorDate: Wed May 15 16:43:57 2024 +0200 HBASE-28502 Cleanup old backup manifest logic (#5871) In older versions of HBase's backup mechanism, a manifest was written per table being backed up. This was since refactored to one manifest per backup, but the manifest code was not updated. A concrete issue with the old code was that the manifest for full backups did not correctly list the tables included in the backup. Signed-off-by: Nick Dimiduk <ndimi...@apache.org> Reviewed-by: Ray Mattingly <rmdmattin...@gmail.com> --- .../hadoop/hbase/backup/HBackupFileSystem.java | 44 +--------------------- .../hadoop/hbase/backup/impl/BackupAdminImpl.java | 8 ++-- .../hadoop/hbase/backup/impl/BackupManifest.java | 2 +- .../hbase/backup/impl/RestoreTablesClient.java | 17 +++------ .../hbase/backup/impl/TableBackupClient.java | 42 ++++----------------- .../hadoop/hbase/backup/util/BackupUtils.java | 7 ++-- .../apache/hadoop/hbase/backup/TestFullBackup.java | 11 ++++++ .../hadoop/hbase/backup/TestIncrementalBackup.java | 8 ++++ 8 files changed, 42 insertions(+), 97 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java index c41a4a18243..2b27e752747 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java @@ -18,11 +18,9 @@ package org.apache.hadoop.hbase.backup; import java.io.IOException; -import java.util.HashMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.impl.BackupManifest; import org.apache.yetus.audience.InterfaceAudience; @@ -44,8 +42,8 @@ public final class HBackupFileSystem { } /** - * Given the backup root dir, backup id and the table name, return the backup image location, - * which is also where the backup manifest file is. return value look like: + * Given the backup root dir, backup id and the table name, return the backup image location. + * Return value look like: * "hdfs://backup.hbase.org:9000/user/biadmin/backup/backup_1396650096738/default/t1_dn/", where * "hdfs://backup.hbase.org:9000/user/biadmin/backup" is a backup root directory * @param backupRootDir backup root directory @@ -79,11 +77,6 @@ public final class HBackupFileSystem { return new Path(getBackupTmpDirPath(backupRoot), backupId); } - public static String getTableBackupDataDir(String backupRootDir, String backupId, - TableName tableName) { - return getTableBackupDir(backupRootDir, backupId, tableName) + Path.SEPARATOR + "data"; - } - public static Path getBackupPath(String backupRootDir, String backupId) { return new Path(backupRootDir + Path.SEPARATOR + backupId); } @@ -102,24 +95,6 @@ public final class HBackupFileSystem { return new Path(getTableBackupDir(backupRootPath.toString(), backupId, tableName)); } - /** - * Given the backup root dir and the backup id, return the log file location for an incremental - * backup. - * @param backupRootDir backup root directory - * @param backupId backup id - * @return logBackupDir: ".../user/biadmin/backup/WALs/backup_1396650096738" - */ - public static String getLogBackupDir(String backupRootDir, String backupId) { - return backupRootDir + Path.SEPARATOR + backupId + Path.SEPARATOR - + HConstants.HREGION_LOGDIR_NAME; - } - - public static Path getLogBackupPath(String backupRootDir, String backupId) { - return new Path(getLogBackupDir(backupRootDir, backupId)); - } - - // TODO we do not keep WAL files anymore - // Move manifest file to other place private static Path getManifestPath(Configuration conf, Path backupRootPath, String backupId) throws IOException { FileSystem fs = backupRootPath.getFileSystem(conf); @@ -140,19 +115,4 @@ public final class HBackupFileSystem { new BackupManifest(conf, getManifestPath(conf, backupRootPath, backupId)); return manifest; } - - /** - * Check whether the backup image path and there is manifest file in the path. - * @param backupManifestMap If all the manifests are found, then they are put into this map - * @param tableArray the tables involved - * @throws IOException exception - */ - public static void checkImageManifestExist(HashMap<TableName, BackupManifest> backupManifestMap, - TableName[] tableArray, Configuration conf, Path backupRootPath, String backupId) - throws IOException { - for (TableName tableName : tableArray) { - BackupManifest manifest = getManifest(conf, backupRootPath, backupId); - backupManifestMap.put(tableName, manifest); - } - } } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java index f580fb0c47b..69aef51a4ed 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.backup.impl; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -498,16 +499,15 @@ public class BackupAdminImpl implements BackupAdmin { @Override public void restore(RestoreRequest request) throws IOException { if (request.isCheck()) { - HashMap<TableName, BackupManifest> backupManifestMap = new HashMap<>(); // check and load backup image manifest for the tables Path rootPath = new Path(request.getBackupRootDir()); String backupId = request.getBackupId(); TableName[] sTableArray = request.getFromTables(); - HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray, - conn.getConfiguration(), rootPath, backupId); + BackupManifest manifest = + HBackupFileSystem.getManifest(conn.getConfiguration(), rootPath, backupId); // Check and validate the backup image and its dependencies - if (BackupUtils.validate(backupManifestMap, conn.getConfiguration())) { + if (BackupUtils.validate(Arrays.asList(sTableArray), manifest, conn.getConfiguration())) { LOG.info(CHECK_OK); } else { LOG.error(CHECK_FAILED); diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java index 3a1cbd55c58..237d8686ab7 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java @@ -465,7 +465,7 @@ public class BackupManifest { } /** - * TODO: fix it. Persist the manifest file. + * Persist the manifest file. * @throws BackupException if an error occurred while storing the manifest file. */ public void store(Configuration conf) throws BackupException { diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java index 654fe343e27..0c3c5b40ffb 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java @@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.JOB_NAME_CON import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.TreeSet; import org.apache.commons.lang3.StringUtils; @@ -204,19 +203,17 @@ public class RestoreTablesClient { /** * Restore operation. Stage 2: resolved Backup Image dependency - * @param backupManifestMap : tableName, Manifest - * @param sTableArray The array of tables to be restored - * @param tTableArray The array of mapping tables to restore to + * @param sTableArray The array of tables to be restored + * @param tTableArray The array of mapping tables to restore to * @throws IOException exception */ - private void restore(HashMap<TableName, BackupManifest> backupManifestMap, - TableName[] sTableArray, TableName[] tTableArray, boolean isOverwrite) throws IOException { + private void restore(BackupManifest manifest, TableName[] sTableArray, TableName[] tTableArray, + boolean isOverwrite) throws IOException { TreeSet<BackupImage> restoreImageSet = new TreeSet<>(); for (int i = 0; i < sTableArray.length; i++) { TableName table = sTableArray[i]; - BackupManifest manifest = backupManifestMap.get(table); // Get the image list of this backup for restore in time order from old // to new. List<BackupImage> list = new ArrayList<>(); @@ -256,12 +253,10 @@ public class RestoreTablesClient { checkTargetTables(tTableArray, isOverwrite); // case RESTORE_IMAGES: - HashMap<TableName, BackupManifest> backupManifestMap = new HashMap<>(); // check and load backup image manifest for the tables Path rootPath = new Path(backupRootDir); - HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray, conf, rootPath, - backupId); + BackupManifest manifest = HBackupFileSystem.getManifest(conf, rootPath, backupId); - restore(backupManifestMap, sTableArray, tTableArray, isOverwrite); + restore(manifest, sTableArray, tTableArray, isOverwrite); } } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java index 60dbc6470a7..e758ced3f84 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.backup.impl; import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -268,7 +267,7 @@ public abstract class TableBackupClient { } /** - * Add manifest for the current backup. The manifest is stored within the table backup directory. + * Creates a manifest based on the provided info, and store it in the backup-specific directory. * @param backupInfo The current backup info * @throws IOException exception */ @@ -277,43 +276,16 @@ public abstract class TableBackupClient { // set the overall backup phase : store manifest backupInfo.setPhase(BackupPhase.STORE_MANIFEST); - BackupManifest manifest; - - // Since we have each table's backup in its own directory structure, - // we'll store its manifest with the table directory. - for (TableName table : backupInfo.getTables()) { - manifest = new BackupManifest(backupInfo, table); - ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo, table); - for (BackupImage image : ancestors) { - manifest.addDependentImage(image); - } - - if (type == BackupType.INCREMENTAL) { - // We'll store the log timestamps for this table only in its manifest. - Map<TableName, Map<String, Long>> tableTimestampMap = new HashMap<>(); - tableTimestampMap.put(table, backupInfo.getIncrTimestampMap().get(table)); - manifest.setIncrTimestampMap(tableTimestampMap); - ArrayList<BackupImage> ancestorss = backupManager.getAncestors(backupInfo); - for (BackupImage image : ancestorss) { - manifest.addDependentImage(image); - } - } - manifest.store(conf); - } - - // For incremental backup, we store a overall manifest in - // <backup-root-dir>/WALs/<backup-id> - // This is used when created the next incremental backup + BackupManifest manifest = new BackupManifest(backupInfo); if (type == BackupType.INCREMENTAL) { - manifest = new BackupManifest(backupInfo); // set the table region server start and end timestamps for incremental backup manifest.setIncrTimestampMap(backupInfo.getIncrTimestampMap()); - ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo); - for (BackupImage image : ancestors) { - manifest.addDependentImage(image); - } - manifest.store(conf); } + ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo); + for (BackupImage image : ancestors) { + manifest.addDependentImage(image); + } + manifest.store(conf); } /** diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java index d0a04960779..aa87a2f3d40 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java @@ -664,15 +664,14 @@ public final class BackupUtils { return request; } - public static boolean validate(HashMap<TableName, BackupManifest> backupManifestMap, + public static boolean validate(List<TableName> tables, BackupManifest backupManifest, Configuration conf) throws IOException { boolean isValid = true; - for (Entry<TableName, BackupManifest> manifestEntry : backupManifestMap.entrySet()) { - TableName table = manifestEntry.getKey(); + for (TableName table : tables) { TreeSet<BackupImage> imageSet = new TreeSet<>(); - ArrayList<BackupImage> depList = manifestEntry.getValue().getDependentListByTable(table); + ArrayList<BackupImage> depList = backupManifest.getDependentListByTable(table); if (depList != null && !depList.isEmpty()) { imageSet.addAll(depList); } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java index 7cec0679974..ba09817fcde 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java @@ -17,10 +17,14 @@ */ package org.apache.hadoop.hbase.backup; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.util.HashSet; import java.util.List; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.backup.impl.BackupManifest; import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.util.ToolRunner; @@ -30,6 +34,8 @@ import org.junit.experimental.categories.Category; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; + @Category(LargeTests.class) public class TestFullBackup extends TestBackupBase { @@ -56,6 +62,11 @@ public class TestFullBackup extends TestBackupBase { String backupId = data.getBackupId(); assertTrue(checkSucceeded(backupId)); } + + BackupInfo newestBackup = backups.get(0); + BackupManifest manifest = + HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR), newestBackup.getBackupId()); + assertEquals(Sets.newHashSet(table1, table2), new HashSet<>(manifest.getTableList())); } LOG.info("backup complete"); } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java index 90fbba2bf0a..86966ddfd6e 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java @@ -17,16 +17,20 @@ */ package org.apache.hadoop.hbase.backup; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.List; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.SingleProcessHBaseCluster; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl; +import org.apache.hadoop.hbase.backup.impl.BackupManifest; import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; @@ -50,6 +54,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; @Category(LargeTests.class) @RunWith(Parameterized.class) @@ -148,6 +153,9 @@ public class TestIncrementalBackup extends TestBackupBase { request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); String backupIdIncMultiple = client.backupTables(request); assertTrue(checkSucceeded(backupIdIncMultiple)); + BackupManifest manifest = + HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR), backupIdIncMultiple); + assertEquals(Sets.newHashSet(table1, table2), new HashSet<>(manifest.getTableList())); // add column family f2 to table1 // drop column family f3