HBASE-17718 Difference between RS's servername and its ephemeral node cause SSH stop working; AMENDMENT.
Make test tighter by extending ServerListener so can find when Master is in the waiting-on-regionservers state and making more assertions about state. Fix error where I would move on from waiting-on-regionservers if we had waited max time. Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6a57050c Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6a57050c Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6a57050c Branch: refs/heads/hbase-12439 Commit: 6a57050c24100438508199c9856b95be7024803a Parents: e239e8d Author: Michael Stack <st...@apache.org> Authored: Wed Mar 8 04:58:53 2017 -0800 Committer: Michael Stack <st...@apache.org> Committed: Wed Mar 8 08:14:24 2017 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/master/HMaster.java | 8 +- .../hadoop/hbase/master/ServerListener.java | 15 ++-- .../hadoop/hbase/master/ServerManager.java | 49 ++++++----- .../hbase/zookeeper/DrainingServerTracker.java | 22 ++--- .../TestRSKilledWhenInitializing.java | 88 ++++++++++++-------- 5 files changed, 103 insertions(+), 79 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/6a57050c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 501d3bd..a1cbe53 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -305,7 +305,7 @@ public class HMaster extends HRegionServer implements MasterServices { MemoryBoundedLogMessageBuffer rsFatals; // flag set after we become the active master (used for testing) - private volatile boolean isActiveMaster = false; + private volatile boolean activeMaster = false; // flag set after we complete initialization once active, // it is not private since it's used in unit tests @@ -597,7 +597,7 @@ public class HMaster extends HRegionServer implements MasterServices { @Override protected void waitForMasterActive(){ boolean tablesOnMaster = BaseLoadBalancer.tablesOnMaster(conf); - while (!(tablesOnMaster && isActiveMaster) + while (!(tablesOnMaster && activeMaster) && !isStopped() && !isAborted()) { sleeper.sleep(); } @@ -733,7 +733,7 @@ public class HMaster extends HRegionServer implements MasterServices { private void finishActiveMasterInitialization(MonitoredTask status) throws IOException, InterruptedException, KeeperException, CoordinatedStateException { - isActiveMaster = true; + activeMaster = true; Thread zombieDetector = new Thread(new InitializationMonitor(this), "ActiveMasterInitializationMonitor-" + System.currentTimeMillis()); zombieDetector.start(); @@ -2555,7 +2555,7 @@ public class HMaster extends HRegionServer implements MasterServices { */ @Override public boolean isActiveMaster() { - return isActiveMaster; + return activeMaster; } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/6a57050c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerListener.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerListener.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerListener.java index f168686..7946735 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerListener.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerListener.java @@ -22,20 +22,25 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.ServerName; /** - * Get notification of server events. The invocations are inline - * so make sure your implementation is fast else you'll slow hbase. + * Get notification of server registration events. The invocations are inline + * so make sure your implementation is fast or else you'll slow hbase. */ @InterfaceAudience.Private public interface ServerListener { /** + * Started waiting on RegionServers to check-in. + */ + default void waiting() {}; + + /** * The server has joined the cluster. * @param serverName The remote servers name. */ - void serverAdded(final ServerName serverName); + default void serverAdded(final ServerName serverName) {}; /** * The server was removed from the cluster. * @param serverName The remote servers name. */ - void serverRemoved(final ServerName serverName); -} + default void serverRemoved(final ServerName serverName) {}; +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hbase/blob/6a57050c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index e6b60d8..db0a0e5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.RetriesExhaustedException; +import org.apache.hadoop.hbase.ipc.FailedServerException; import org.apache.hadoop.hbase.ipc.HBaseRpcController; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; @@ -772,7 +773,16 @@ public class ServerManager { */ private void checkForRSznode(final ServerName serverName, final ServiceException se) { if (se.getCause() == null) return; - if (!(se.getCause() instanceof ConnectException)) return; + Throwable t = se.getCause(); + if (t instanceof ConnectException) { + // If this, proceed to do cleanup. + } else { + // Look for FailedServerException + if (!(t instanceof IOException)) return; + if (t.getCause() == null) return; + if (!(t.getCause() instanceof FailedServerException)) return; + // Ok, found FailedServerException -- continue. + } if (!isServerOnline(serverName)) return; // We think this server is online. Check it has a znode up. Currently, a RS // registers an ephereral znode in zk. If not present, something is up. Maybe @@ -1030,20 +1040,19 @@ public class ServerManager { * * @throws InterruptedException */ - public void waitForRegionServers(MonitoredTask status) - throws InterruptedException { + public void waitForRegionServers(MonitoredTask status) throws InterruptedException { final long interval = this.master.getConfiguration(). - getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500); + getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500); final long timeout = this.master.getConfiguration(). - getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500); + getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500); // Min is not an absolute; just a friction making us wait longer on server checkin. int minToStart = getMinToStart(); int maxToStart = this.master.getConfiguration(). - getInt(WAIT_ON_REGIONSERVERS_MAXTOSTART, Integer.MAX_VALUE); + getInt(WAIT_ON_REGIONSERVERS_MAXTOSTART, Integer.MAX_VALUE); if (maxToStart < minToStart) { LOG.warn(String.format("The value of '%s' (%d) is set less than '%s' (%d), ignoring.", - WAIT_ON_REGIONSERVERS_MAXTOSTART, maxToStart, - WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart)); + WAIT_ON_REGIONSERVERS_MAXTOSTART, maxToStart, + WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart)); maxToStart = Integer.MAX_VALUE; } @@ -1060,19 +1069,19 @@ public class ServerManager { // Next, we will keep cycling if ANY of the following three conditions are true: // 1. The time since a regionserver registered is < interval (means servers are actively checking in). // 2. We are under the total timeout. - // 3. The count of servers is < minimum expected AND we are within timeout (this just puts up - // a little friction making us wait a bit longer if < minimum servers). + // 3. The count of servers is < minimum. + for (ServerListener listener: this.listeners) { + listener.waiting(); + } while (!this.master.isStopped() && count < maxToStart && - (((lastCountChange + interval) > now) || - (timeout > slept) || - ((count < minToStart) && (timeout > slept)))) { + ((lastCountChange + interval) > now || timeout > slept || count < minToStart)) { // Log some info at every interval time or if there is a change if (oldCount != count || lastLogTime + interval < now) { lastLogTime = now; String msg = - "Waiting for RegionServer count=" + count + " to settle; waited "+ - slept + "ms, expecting minimum=" + minToStart + "server(s) (max="+ getStrForMax(maxToStart) + "server(s)), " + - "timeout=" + timeout + "ms, lastChange=" + (lastCountChange - now) + "ms"; + "Waiting on RegionServer count=" + count + " to settle; waited="+ + slept + "ms, expecting min=" + minToStart + " server(s), max="+ getStrForMax(maxToStart) + + " server(s), " + "timeout=" + timeout + "ms, lastChange=" + (lastCountChange - now) + "ms"; LOG.info(msg); status.setStatus(msg); } @@ -1089,11 +1098,9 @@ public class ServerManager { lastCountChange = now; } } - - LOG.info("Finished waiting for RegionServer count=" + count + " to settle, slept for " + slept + "ms," + - " expecting minimum=" + minToStart + " server(s) (max=" + getStrForMax(maxToStart) + " server(s),"+ - " Master is "+ (this.master.isStopped() ? "stopped.": "running") - ); + LOG.info("Finished wait on RegionServer count=" + count + "; waited=" + slept + "ms," + + " expected min=" + minToStart + " server(s), max=" + getStrForMax(maxToStart) + " server(s),"+ + " master is "+ (this.master.isStopped() ? "stopped.": "running")); } private String getStrForMax(final int max) { http://git-wip-us.apache.org/repos/asf/hbase/blob/6a57050c/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java index 32e0862..a4880df 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/DrainingServerTracker.java @@ -70,22 +70,14 @@ public class DrainingServerTracker extends ZooKeeperListener { public void start() throws KeeperException, IOException { watcher.registerListener(this); // Add a ServerListener to check if a server is draining when it's added. - serverManager.registerListener( - new ServerListener() { - - @Override - public void serverAdded(ServerName sn) { - if (drainingServers.contains(sn)){ - serverManager.addServerToDrainList(sn); - } - } - - @Override - public void serverRemoved(ServerName serverName) { - // no-op - } + serverManager.registerListener(new ServerListener() { + @Override + public void serverAdded(ServerName sn) { + if (drainingServers.contains(sn)){ + serverManager.addServerToDrainList(sn); } - ); + } + }); List<String> servers = ZKUtil.listChildrenAndWatchThem(watcher, watcher.znodePaths.drainingZNode); add(servers); http://git-wip-us.apache.org/repos/asf/hbase/blob/6a57050c/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java index c01db2a..304bfe7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.regionserver; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.List; @@ -38,6 +39,7 @@ import org.apache.hadoop.hbase.LocalHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.ServerListener; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse; import org.apache.hadoop.hbase.testclassification.MediumTests; @@ -92,52 +94,57 @@ public class TestRSKilledWhenInitializing { RegisterAndDieRegionServer.class); final MasterThread master = startMaster(cluster.getMasters().get(0)); try { - masterActive.set(true); - // Now start regionservers. - // First RS to report for duty will kill itself when it gets a response. - // See below in the RegisterAndDieRegionServer handleReportForDutyResponse. + // Master is up waiting on RegionServers to check in. Now start RegionServers. for (int i = 0; i < NUM_RS; i++) { cluster.getRegionServers().get(i).start(); } - // Now wait on master to see NUM_RS + 1 servers as being online, NUM_RS and itself. - // Then wait until the killed RS gets removed from zk and triggers Master to remove - // it from list of online RS. - List<ServerName> onlineServersList = - master.getMaster().getServerManager().getOnlineServersList(); - while (onlineServersList.size() < NUM_RS + 1) { - // Spin till we see NUM_RS + Master in online servers list. + // Now wait on master to see NUM_RS + 1 servers as being online, thats NUM_RS plus + // the Master itself (because Master hosts hbase:meta and checks in as though it a RS). + List<ServerName> onlineServersList = null; + do { onlineServersList = master.getMaster().getServerManager().getOnlineServersList(); - } - LOG.info(onlineServersList); - assertEquals(NUM_RS + 1, onlineServersList.size()); - // Steady state. How many regions open? - // Wait until killedRS is set + } while (onlineServersList.size() < (NUM_RS + 1)); + // Wait until killedRS is set. Means RegionServer is starting to go down. while (killedRS.get() == null) { - Threads.sleep(10); + Threads.sleep(1); + } + // Wait on the RegionServer to fully die. + while (cluster.getLiveRegionServers().size() > NUM_RS) { + Threads.sleep(1); + } + // Make sure Master is fully up before progressing. Could take a while if regions + // being reassigned. + while (!master.getMaster().isInitialized()) { + Threads.sleep(1); } - final int regionsOpenCount = master.getMaster().getAssignmentManager().getNumRegionsOpened(); + + // Now in steady state. How many regions open? Master should have too many regionservers + // showing still. The downed RegionServer should still be showing as registered. + assertTrue(master.getMaster().getServerManager().isServerOnline(killedRS.get())); // Find non-meta region (namespace?) and assign to the killed server. That'll trigger cleanup. - Map<HRegionInfo, ServerName> assigments = - master.getMaster().getAssignmentManager().getRegionStates().getRegionAssignments(); + Map<HRegionInfo, ServerName> assignments = null; + do { + assignments = master.getMaster().getAssignmentManager().getRegionStates().getRegionAssignments(); + } while (assignments == null || assignments.size() < 2); HRegionInfo hri = null; - for (Map.Entry<HRegionInfo, ServerName> e: assigments.entrySet()) { + for (Map.Entry<HRegionInfo, ServerName> e: assignments.entrySet()) { if (e.getKey().isMetaRegion()) continue; hri = e.getKey(); break; } // Try moving region to the killed server. It will fail. As by-product, we will // remove the RS from Master online list because no corresponding znode. + assertEquals(NUM_RS + 1, master.getMaster().getServerManager().getOnlineServersList().size()); LOG.info("Move " + hri.getEncodedName() + " to " + killedRS.get()); master.getMaster().move(hri.getEncodedNameAsBytes(), Bytes.toBytes(killedRS.get().toString())); - while (onlineServersList.size() > NUM_RS) { + // Wait until the RS no longer shows as registered in Master. + while (onlineServersList.size() > (NUM_RS + 1)) { Thread.sleep(100); onlineServersList = master.getMaster().getServerManager().getOnlineServersList(); } - // Just for kicks, ensure namespace was put back on the old server after above failed move. - assertEquals(regionsOpenCount, - master.getMaster().getAssignmentManager().getNumRegionsOpened()); } finally { + // Shutdown is messy with complaints about fs being closed. Why? TODO. cluster.shutdown(); cluster.join(); TEST_UTIL.shutdownMiniDFSCluster(); @@ -146,19 +153,32 @@ public class TestRSKilledWhenInitializing { } } + /** + * Start Master. Get as far as the state where Master is waiting on + * RegionServers to check in, then return. + */ private MasterThread startMaster(MasterThread master) { master.start(); - long startTime = System.currentTimeMillis(); - while (!master.getMaster().isInitialized()) { - try { - Thread.sleep(100); - } catch (InterruptedException ignored) { - LOG.info("Interrupted: ignoring"); - } - if (System.currentTimeMillis() > startTime + 30000) { - throw new RuntimeException("Master not active after 30 seconds"); + // It takes a while until ServerManager creation to happen inside Master startup. + while (master.getMaster().getServerManager() == null) { + continue; + } + // Set a listener for the waiting-on-RegionServers state. We want to wait + // until this condition before we leave this method and start regionservers. + final AtomicBoolean waiting = new AtomicBoolean(false); + if (master.getMaster().getServerManager() == null) throw new NullPointerException("SM"); + master.getMaster().getServerManager().registerListener(new ServerListener() { + @Override + public void waiting() { + waiting.set(true); } + }); + // Wait until the Master gets to place where it is waiting on RegionServers to check in. + while (!waiting.get()) { + continue; } + // Set the global master-is-active; gets picked up by regionservers later. + masterActive.set(true); return master; }