HBASE-19980 NullPointerException when restoring a snapshot after splitting a region
Signed-off-by: tedyu <yuzhih...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/d0f2d18c Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/d0f2d18c Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/d0f2d18c Branch: refs/heads/HBASE-19064 Commit: d0f2d18ca73737764550b319f749a51c876cca39 Parents: 8d26736 Author: Toshihiro Suzuki <brfrn...@gmail.com> Authored: Wed Feb 14 19:55:59 2018 +0900 Committer: tedyu <yuzhih...@gmail.com> Committed: Wed Feb 14 09:37:16 2018 -0800 ---------------------------------------------------------------------- .../hbase/snapshot/RestoreSnapshotHelper.java | 89 ++++++++++++-------- .../client/TestRestoreSnapshotFromClient.java | 20 +++++ 2 files changed, 73 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/d0f2d18c/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java index 404f8ff..c4f0e25 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java @@ -195,11 +195,33 @@ public class RestoreSnapshotHelper { // this instance, by removing the regions already present in the restore dir. Set<String> regionNames = new HashSet<>(regionManifests.keySet()); + List<RegionInfo> tableRegions = getTableRegions(); + RegionInfo mobRegion = MobUtils.getMobRegionInfo(snapshotManifest.getTableDescriptor() .getTableName()); + if (tableRegions != null) { + // restore the mob region in case + if (regionNames.contains(mobRegion.getEncodedName())) { + monitor.rethrowException(); + status.setStatus("Restoring mob region..."); + List<RegionInfo> mobRegions = new ArrayList<>(1); + mobRegions.add(mobRegion); + restoreHdfsMobRegions(exec, regionManifests, mobRegions); + regionNames.remove(mobRegion.getEncodedName()); + status.setStatus("Finished restoring mob region."); + } + } + if (regionNames.contains(mobRegion.getEncodedName())) { + // add the mob region + monitor.rethrowException(); + status.setStatus("Cloning mob region..."); + cloneHdfsMobRegion(regionManifests, mobRegion); + regionNames.remove(mobRegion.getEncodedName()); + status.setStatus("Finished cloning mob region."); + } + // Identify which region are still available and which not. // NOTE: we rely upon the region name as: "table name, start key, end key" - List<RegionInfo> tableRegions = getTableRegions(); if (tableRegions != null) { monitor.rethrowException(); for (RegionInfo regionInfo: tableRegions) { @@ -213,50 +235,40 @@ public class RestoreSnapshotHelper { metaChanges.addRegionToRemove(regionInfo); } } - - // Restore regions using the snapshot data - monitor.rethrowException(); - status.setStatus("Restoring table regions..."); - if (regionNames.contains(mobRegion.getEncodedName())) { - // restore the mob region in case - List<RegionInfo> mobRegions = new ArrayList<>(1); - mobRegions.add(mobRegion); - restoreHdfsMobRegions(exec, regionManifests, mobRegions); - regionNames.remove(mobRegion.getEncodedName()); - } - restoreHdfsRegions(exec, regionManifests, metaChanges.getRegionsToRestore()); - status.setStatus("Finished restoring all table regions."); - - // Remove regions from the current table - monitor.rethrowException(); - status.setStatus("Starting to delete excess regions from table"); - removeHdfsRegions(exec, metaChanges.getRegionsToRemove()); - status.setStatus("Finished deleting excess regions from table."); } // Regions to Add: present in the snapshot but not in the current table + List<RegionInfo> regionsToAdd = new ArrayList<>(regionNames.size()); if (regionNames.size() > 0) { - List<RegionInfo> regionsToAdd = new ArrayList<>(regionNames.size()); - monitor.rethrowException(); - // add the mob region - if (regionNames.contains(mobRegion.getEncodedName())) { - cloneHdfsMobRegion(regionManifests, mobRegion); - regionNames.remove(mobRegion.getEncodedName()); - } for (String regionName: regionNames) { LOG.info("region to add: " + regionName); - regionsToAdd.add(ProtobufUtil.toRegionInfo(regionManifests.get(regionName).getRegionInfo())); + regionsToAdd.add(ProtobufUtil.toRegionInfo(regionManifests.get(regionName) + .getRegionInfo())); } - - // Create new regions cloning from the snapshot - monitor.rethrowException(); - status.setStatus("Cloning regions..."); - RegionInfo[] clonedRegions = cloneHdfsRegions(exec, regionManifests, regionsToAdd); - metaChanges.setNewRegions(clonedRegions); - status.setStatus("Finished cloning regions."); } + // Create new regions cloning from the snapshot + // HBASE-19980: We need to call cloneHdfsRegions() before restoreHdfsRegions() because + // regionsMap is constructed in cloneHdfsRegions() and it can be used in restoreHdfsRegions(). + monitor.rethrowException(); + status.setStatus("Cloning regions..."); + RegionInfo[] clonedRegions = cloneHdfsRegions(exec, regionManifests, regionsToAdd); + metaChanges.setNewRegions(clonedRegions); + status.setStatus("Finished cloning regions."); + + // Restore regions using the snapshot data + monitor.rethrowException(); + status.setStatus("Restoring table regions..."); + restoreHdfsRegions(exec, regionManifests, metaChanges.getRegionsToRestore()); + status.setStatus("Finished restoring all table regions."); + + // Remove regions from the current table + monitor.rethrowException(); + status.setStatus("Starting to delete excess regions from table"); + removeHdfsRegions(exec, metaChanges.getRegionsToRemove()); + status.setStatus("Finished deleting excess regions from table."); + LOG.info("finishing restore table regions using snapshot=" + snapshotDesc); return metaChanges; @@ -742,11 +754,16 @@ public class RestoreSnapshotHelper { // Add the daughter region to the map String regionName = Bytes.toString(regionsMap.get(regionInfo.getEncodedNameAsBytes())); + if (regionName == null) { + regionName = regionInfo.getEncodedName(); + } LOG.debug("Restore reference " + regionName + " to " + clonedRegionName); synchronized (parentsMap) { Pair<String, String> daughters = parentsMap.get(clonedRegionName); if (daughters == null) { - daughters = new Pair<>(regionName, null); + // In case one side of the split is already compacted, regionName is put as both first and + // second of Pair + daughters = new Pair<>(regionName, regionName); parentsMap.put(clonedRegionName, daughters); } else if (!regionName.equals(daughters.getFirst())) { daughters.setSecond(regionName); http://git-wip-us.apache.org/repos/asf/hbase/blob/d0f2d18c/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java index 2556bec..eb8b20e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java @@ -23,6 +23,7 @@ import static org.junit.Assert.fail; import java.io.IOException; import java.util.HashSet; +import java.util.List; import java.util.Set; import org.apache.hadoop.conf.Configuration; @@ -295,6 +296,25 @@ public class TestRestoreSnapshotFromClient { } } + @Test + public void testRestoreSnapshotAfterSplittingRegions() throws IOException, InterruptedException { + List<RegionInfo> regionInfos = admin.getRegions(tableName); + RegionReplicaUtil.removeNonDefaultRegions(regionInfos); + + // Split the first region + splitRegion(regionInfos.get(0)); + + // Take a snapshot + admin.snapshot(snapshotName1, tableName); + + // Restore the snapshot + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName1); + admin.enableTable(tableName); + + verifyRowCount(TEST_UTIL, tableName, snapshot1Rows); + } + // ========================================================================== // Helpers // ==========================================================================