HBASE-16314 Retry on table snapshot failure during full backup (Vladimir Rodionov)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e95cf479 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e95cf479 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e95cf479 Branch: refs/heads/HBASE-16961 Commit: e95cf479c7615ae160a6ba963cc7689f3b440efd Parents: a3b6f4a Author: tedyu <yuzhih...@gmail.com> Authored: Fri Apr 21 16:15:07 2017 -0700 Committer: tedyu <yuzhih...@gmail.com> Committed: Fri Apr 21 16:15:07 2017 -0700 ---------------------------------------------------------------------- .../hbase/backup/BackupRestoreConstants.java | 10 ++++++ .../backup/impl/FullTableBackupClient.java | 36 ++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/e95cf479/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java index e46904b..d1ab246 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java @@ -37,6 +37,16 @@ public interface BackupRestoreConstants { public final static int BACKUP_SYSTEM_TTL_DEFAULT = HConstants.FOREVER; public final static String BACKUP_ENABLE_KEY = "hbase.backup.enable"; public final static boolean BACKUP_ENABLE_DEFAULT = false; + + + public static final String BACKUP_MAX_ATTEMPTS_KEY = "hbase.backup.attempts.max"; + public static final int DEFAULT_BACKUP_MAX_ATTEMPTS = 10; + + public static final String BACKUP_ATTEMPTS_PAUSE_MS_KEY = "hbase.backup.attempts.pause.ms"; + public static final int DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS = 10000; + + + /* * Drivers option list */ http://git-wip-us.apache.org/repos/asf/hbase/blob/e95cf479/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java index 77d1184..ee7a841 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java @@ -18,6 +18,11 @@ package org.apache.hadoop.hbase.backup.impl; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.BACKUP_ATTEMPTS_PAUSE_MS_KEY; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.BACKUP_MAX_ATTEMPTS_KEY; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_BACKUP_MAX_ATTEMPTS; + import java.io.IOException; import java.util.HashMap; import java.util.List; @@ -148,8 +153,7 @@ public class FullTableBackupClient extends TableBackupClient { "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime()) + "_" + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString(); - admin.snapshot(snapshotName, tableName); - + snapshotTable(admin, tableName, snapshotName); backupInfo.setSnapshotName(tableName, snapshotName); } @@ -186,4 +190,32 @@ public class FullTableBackupClient extends TableBackupClient { } + private void snapshotTable(Admin admin, TableName tableName, String snapshotName) + throws IOException { + + int maxAttempts = + conf.getInt(BACKUP_MAX_ATTEMPTS_KEY, DEFAULT_BACKUP_MAX_ATTEMPTS); + int pause = + conf.getInt(BACKUP_ATTEMPTS_PAUSE_MS_KEY, DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS); + int attempts = 0; + + while (attempts++ < maxAttempts) { + try { + admin.snapshot(snapshotName, tableName); + return; + } catch (IOException ee) { + LOG.warn("Snapshot attempt " + attempts + " failed for table " + tableName + + ", sleeping for " + pause + "ms", ee); + if (attempts < maxAttempts) { + try { + Thread.sleep(pause); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } + } + } + } + throw new IOException("Failed to snapshot table "+ tableName); + } }