Repository: hbase Updated Branches: refs/heads/branch-1 b0c749874 -> 005fdac29
HBASE-11703 Meta region state could be corrupted Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/005fdac2 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/005fdac2 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/005fdac2 Branch: refs/heads/branch-1 Commit: 005fdac29bceb7b750cc75f51c6515d5f6b9951c Parents: b0c7498 Author: Jimmy Xiang <[email protected]> Authored: Thu Aug 7 20:58:17 2014 -0700 Committer: Jimmy Xiang <[email protected]> Committed: Tue Aug 12 11:49:57 2014 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/master/RegionStates.java | 2 +- .../master/handler/ServerShutdownHandler.java | 3 -- .../master/TestAssignmentManagerOnCluster.java | 47 +++++++++++++++++++- 3 files changed, 47 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/005fdac2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 67eda4a..ecb6df7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -534,7 +534,7 @@ public class RegionStates { ServerName oldServerName = regionAssignments.remove(hri); if (oldServerName != null && serverHoldings.containsKey(oldServerName) && (newState == State.MERGED || newState == State.SPLIT - || tableStateManager.isTableState(hri.getTable(), + || hri.isMetaRegion() || tableStateManager.isTableState(hri.getTable(), ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING))) { // Offline the region only if it's merged/split, or the table is disabled/disabling. // Otherwise, offline it from this server only when it is online on a different server. http://git-wip-us.apache.org/repos/asf/hbase/blob/005fdac2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 607d042..11a4f10 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -168,9 +168,6 @@ public class ServerShutdownHandler extends EventHandler { } else { // Not using ZK for assignment, regionStates has everything we want hris = am.getRegionStates().getServerRegions(serverName); - if (hris != null) { - hris.remove(HRegionInfo.FIRST_META_REGIONINFO); - } } } break; http://git-wip-us.apache.org/repos/asf/hbase/blob/005fdac2/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index db2751d..ffc24e4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -49,7 +50,6 @@ import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.client.Admin; -import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; @@ -112,6 +112,51 @@ public class TestAssignmentManagerOnCluster { } /** + * This tests restarting meta regionserver + */ + @Test (timeout=180000) + public void testRestartMetaRegionServer() throws Exception { + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + boolean stoppedARegionServer = false; + try { + HMaster master = cluster.getMaster(); + RegionStates regionStates = master.getAssignmentManager().getRegionStates(); + ServerName metaServerName = regionStates.getRegionServerOfRegion( + HRegionInfo.FIRST_META_REGIONINFO); + if (master.getServerName().equals(metaServerName)) { + // Move meta off master + metaServerName = cluster.getLiveRegionServerThreads() + .get(0).getRegionServer().getServerName(); + master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), + Bytes.toBytes(metaServerName.getServerName())); + master.assignmentManager.waitUntilNoRegionsInTransition(60000); + } + assertNotEquals("Meta should be moved off master", + metaServerName, master.getServerName()); + cluster.killRegionServer(metaServerName); + stoppedARegionServer = true; + cluster.waitForRegionServerToStop(metaServerName, 60000); + + // Wait for SSH to finish + final ServerManager serverManager = master.getServerManager(); + TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { + @Override + public boolean evaluate() throws Exception { + return !serverManager.areDeadServersInProgress(); + } + }); + + // Now, make sure meta is assigned + assertTrue("Meta should be assigned", + regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); + } finally { + if (stoppedARegionServer) { + cluster.startRegionServer(); + } + } + } + + /** * This tests region assignment */ @Test (timeout=60000)
