Repository: hadoop Updated Branches: refs/heads/trunk abae63caf -> a6aa6e42c
YARN-2588. Standby RM fails to transitionToActive if previous transitionToActive failed with ZK exception. Contributed by Rohith Sharmaks Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a6aa6e42 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a6aa6e42 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a6aa6e42 Branch: refs/heads/trunk Commit: a6aa6e42cacdbfcc1c2b7c19e7239204fe9ff654 Parents: abae63c Author: Jian He <[email protected]> Authored: Fri Oct 17 10:54:24 2014 -0700 Committer: Jian He <[email protected]> Committed: Fri Oct 17 10:54:24 2014 -0700 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 ++ .../server/resourcemanager/ResourceManager.java | 10 +++- .../yarn/server/resourcemanager/TestRMHA.java | 52 ++++++++++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a6aa6e42/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5056470..151210a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -677,6 +677,9 @@ Release 2.6.0 - UNRELEASED YARN-2566. DefaultContainerExecutor should pick a working directory randomly. (Zhihai Xu via kasha) + YARN-2588. Standby RM fails to transitionToActive if previous + transitionToActive failed with ZK exception. (Rohith Sharmaks via jianhe) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/a6aa6e42/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 68cbc7c..bcf7a54 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -1023,8 +1023,14 @@ public class ResourceManager extends CompositeService implements Recoverable { this.rmLoginUGI.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { - startActiveServices(); - return null; + try { + startActiveServices(); + return null; + } catch (Exception e) { + resetDispatcher(); + createAndInitActiveServices(); + throw e; + } } }); http://git-wip-us.apache.org/repos/asf/hadoop/blob/a6aa6e42/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java index 5a0e95f..e30ca29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -398,6 +399,57 @@ public class TestRMHA { innerTestHAWithRMHostName(true); } + @Test(timeout = 30000) + public void testFailoverWhenTransitionToActiveThrowException() + throws Exception { + configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false); + Configuration conf = new YarnConfiguration(configuration); + + MemoryRMStateStore memStore = new MemoryRMStateStore() { + int count = 0; + + @Override + public synchronized void startInternal() throws Exception { + // first time throw exception + if (count++ == 0) { + throw new Exception("Session Expired"); + } + } + }; + // start RM + memStore.init(conf); + + rm = new MockRM(conf, memStore); + rm.init(conf); + StateChangeRequestInfo requestInfo = + new StateChangeRequestInfo( + HAServiceProtocol.RequestSource.REQUEST_BY_USER); + + assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService + .getServiceStatus().getState()); + assertFalse("RM is ready to become active before being started", + rm.adminService.getServiceStatus().isReadyToBecomeActive()); + checkMonitorHealth(); + + rm.start(); + checkMonitorHealth(); + checkStandbyRMFunctionality(); + + // 2. Try Transition to active, throw exception + try { + rm.adminService.transitionToActive(requestInfo); + Assert.fail("Transitioned to Active should throw exception."); + } catch (Exception e) { + assertTrue("Error when transitioning to Active mode".contains(e + .getMessage())); + } + + // 3. Transition to active, success + rm.adminService.transitionToActive(requestInfo); + checkMonitorHealth(); + checkActiveRMFunctionality(); + } + public void innerTestHAWithRMHostName(boolean includeBindHost) { //this is run two times, with and without a bind host configured if (includeBindHost) {
