HBASE-12480 Regions in FAILED_OPEN/FAILED_CLOSE should be processed on master 
failover


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/4ff74274
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/4ff74274
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/4ff74274

Branch: refs/heads/branch-1
Commit: 4ff742742be53d8c6a08fb4ce37bd80f2988abac
Parents: 908779b
Author: Virag Kothari <vi...@yahoo-inc.com>
Authored: Tue Jan 13 11:06:24 2015 -0800
Committer: Virag Kothari <vi...@yahoo-inc.com>
Committed: Tue Jan 13 11:06:24 2015 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/master/AssignmentManager.java  | 26 ++++++++----
 .../hadoop/hbase/master/TestMasterFailover.java | 42 ++++++++++++++++++--
 2 files changed, 57 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/4ff74274/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index e39adc8..262ffee 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -550,8 +550,9 @@ public class AssignmentManager extends ZooKeeperListener {
       if (!regionsInTransition.isEmpty()) {
         Set<ServerName> onlineServers = 
serverManager.getOnlineServers().keySet();
         for (RegionState regionState: regionsInTransition.values()) {
+          ServerName serverName = regionState.getServerName();
           if (!regionState.getRegion().isMetaRegion()
-              && onlineServers.contains(regionState.getServerName())) {
+              && serverName != null && onlineServers.contains(serverName)) {
             LOG.debug("Found " + regionState + " in RITs");
             failover = true;
             break;
@@ -2986,15 +2987,22 @@ public class AssignmentManager extends 
ZooKeeperListener {
     // the state after the RPC call. Otherwise, we don't know what's happened
     // to the region if the master dies right after the RPC call is out.
     Map<String, RegionState> rits = regionStates.getRegionsInTransition();
-    for (RegionState regionState: rits.values()) {
-      if (!serverManager.isServerOnline(regionState.getServerName())) {
-        continue; // SSH will handle it
-      }
-      State state = regionState.getState();
+    for (RegionState regionState : rits.values()) {
       LOG.info("Processing " + regionState);
+      ServerName serverName = regionState.getServerName();
+      // Server could be null in case of FAILED_OPEN when master cannot find a 
region plan. In that
+      // case, try assigning it here.
+      if (serverName != null
+          && !serverManager.getOnlineServers().containsKey(serverName)) {
+        LOG.info("Server " + serverName + " isn't online. SSH will handle 
this");
+        continue; 
+      }
+      HRegionInfo regionInfo = regionState.getRegion();
+      State state = regionState.getState();
+      
       switch (state) {
       case CLOSED:
-        invokeAssign(regionState.getRegion());
+        invokeAssign(regionInfo);
         break;
       case PENDING_OPEN:
         retrySendRegionOpen(regionState);
@@ -3002,6 +3010,10 @@ public class AssignmentManager extends ZooKeeperListener 
{
       case PENDING_CLOSE:
         retrySendRegionClose(regionState);
         break;
+      case FAILED_CLOSE:
+      case FAILED_OPEN:  
+        invokeUnAssign(regionInfo);
+        break;
       default:
         // No process for other states
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/4ff74274/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index 26e46c6..8ae26a3 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -1064,8 +1064,8 @@ public class TestMasterFailover {
     RegionState newState = regionStates.getRegionState(hri);
     assertTrue(newState.isOpened());
   }
-
-  /**
+  
+ /**
    * Simple test of master failover.
    * <p>
    * Starts with three masters.  Kills a backup master.  Then kills the active
@@ -1176,7 +1176,7 @@ public class TestMasterFailover {
   }
 
   /**
-   * Test region in pending_open/close when master failover
+   * Test region in pending_open/close and failed_open/close when master 
failover
    */
   @Test (timeout=180000)
   @SuppressWarnings("deprecation")
@@ -1246,6 +1246,37 @@ public class TestMasterFailover {
     oldState = new RegionState(hriOffline, State.OFFLINE);
     newState = new RegionState(hriOffline, State.PENDING_OPEN, 
newState.getServerName());
     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
+    
+    HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), 
null, null);
+    createRegion(failedClose, rootdir, conf, offlineTable);
+    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
+    
+    oldState = new RegionState(failedClose, State.PENDING_CLOSE);
+    newState = new RegionState(failedClose, State.FAILED_CLOSE, 
newState.getServerName());
+    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
+    
+   
+    HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), 
null, null);
+    createRegion(failedOpen, rootdir, conf, offlineTable);
+    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
+    
+    // Simulate a region transitioning to failed open when the region server 
reports the
+    // transition as FAILED_OPEN
+    oldState = new RegionState(failedOpen, State.PENDING_OPEN);
+    newState = new RegionState(failedOpen, State.FAILED_OPEN, 
newState.getServerName());
+    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
+    
+    HRegionInfo failedOpenNullServer = new 
HRegionInfo(offlineTable.getTableName(), null, null);
+    createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
+    MetaTableAccessor.addRegionToMeta(master.getConnection(), 
failedOpenNullServer);
+    
+    // Simulate a region transitioning to failed open when the master couldn't 
find a plan for
+    // the region
+    oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
+    newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
+    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
+    
+    
 
     // Stop the master
     log("Aborting master");
@@ -1269,7 +1300,10 @@ public class TestMasterFailover {
     // Both pending_open (RPC sent/not yet) regions should be online
     assertTrue(regionStates.isRegionOnline(hriOffline));
     assertTrue(regionStates.isRegionOnline(hriOnline));
-
+    assertTrue(regionStates.isRegionOnline(failedClose));
+    assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
+    assertTrue(regionStates.isRegionOnline(failedOpen));
+    
     log("Done with verification, shutting down cluster");
 
     // Done, shutdown the cluster

Reply via email to