Author: stack
Date: Tue Dec 20 06:46:17 2011
New Revision: 1221121

URL: http://svn.apache.org/viewvc?rev=1221121&view=rev
Log:
HBASE-5063 RegionServers fail to report to backup HMaster after primary goes 
down

Modified:
    
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java

Modified: 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1221121&r1=1221120&r2=1221121&view=diff
==============================================================================
--- 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 (original)
+++ 
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 Tue Dec 20 06:46:17 2011
@@ -1738,21 +1738,25 @@ public class HRegionServer implements HR
   private ServerName getMaster() {
     ServerName masterServerName = null;
     long previousLogTime = 0;
-    while ((masterServerName = this.masterAddressManager.getMasterAddress()) 
== null) {
-      if (!keepLooping()) return null;
-      if (System.currentTimeMillis() > (previousLogTime+1000)){
+    HMasterRegionInterface master = null;
+    while (keepLooping() && master == null) {
+      masterServerName = this.masterAddressManager.getMasterAddress();
+      if (masterServerName == null) {
+        if (!keepLooping()) {
+          // give up with no connection.
+          LOG.debug("No master found and cluster is stopped; bailing out");
+          return null;
+        }
         LOG.debug("No master found; retry");
         previousLogTime = System.currentTimeMillis();
+
+        sleeper.sleep();
+        continue;
       }
-      try {
-        Thread.sleep(100);
-      } catch (InterruptedException ignored) {
-      }
-    }
-    InetSocketAddress isa =
-      new InetSocketAddress(masterServerName.getHostname(), 
masterServerName.getPort());
-    HMasterRegionInterface master = null;
-    while (keepLooping() && master == null) {
+
+      InetSocketAddress isa =
+        new InetSocketAddress(masterServerName.getHostname(), 
masterServerName.getPort());
+
       LOG.info("Attempting connect to Master server at " +
         this.masterAddressManager.getMasterAddress());
       try {

Modified: 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: 
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1221121&r1=1221120&r2=1221121&view=diff
==============================================================================
--- 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
 (original)
+++ 
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
 Tue Dec 20 06:46:17 2011
@@ -115,6 +115,9 @@ public class TestMasterFailover {
     }
     assertEquals(1, numActive);
     assertEquals(2, masterThreads.size());
+    int rsCount = 
masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
+    LOG.info("Active master managing " + rsCount +  " regions servers");
+    assertEquals(3, rsCount);
 
     // kill the active master
     LOG.debug("\n\nStopping the active master\n");
@@ -127,8 +130,13 @@ public class TestMasterFailover {
     LOG.debug("\n\nVerifying backup master is now active\n");
     // should only have one master now
     assertEquals(1, masterThreads.size());
+
     // and he should be active
-    assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
+    HMaster active = masterThreads.get(0).getMaster();
+    int rss = active.getClusterStatus().getServersSize();
+    LOG.info("Active master managing " + rss +  " regions servers");
+    assertTrue(active.isActiveMaster());
+    assertEquals(3, rss);
 
     // Stop the cluster
     TEST_UTIL.shutdownMiniCluster();


Reply via email to