(TWILL-142) Add longer time for timeout in EchoServerTestRun to check whether the restart successful.
Bump up the timout for stopwatch to wait until all containers have been restarted. With new feature in TWILL-116 being committed, the EchoServerTestRun test become bit flaky due to sometimes the mini cluster could not allocate new containers in current proposed timeout which is 30s. This closes #56 on Github Signed-off-by: Terence Yim <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-twill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-twill/commit/ecec4b3c Tree: http://git-wip-us.apache.org/repos/asf/incubator-twill/tree/ecec4b3c Diff: http://git-wip-us.apache.org/repos/asf/incubator-twill/diff/ecec4b3c Branch: refs/heads/site Commit: ecec4b3cdd757cbd45eeb30360421f06eba0fecf Parents: 85a626a Author: hsaputra <[email protected]> Authored: Wed Jul 15 15:59:05 2015 -0700 Committer: Terence Yim <[email protected]> Committed: Thu Jul 16 15:52:30 2015 -0700 ---------------------------------------------------------------------- .../internal/appmaster/ApplicationMasterService.java | 2 +- .../java/org/apache/twill/yarn/EchoServerTestRun.java | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/ecec4b3c/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/ApplicationMasterService.java ---------------------------------------------------------------------- diff --git a/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/ApplicationMasterService.java b/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/ApplicationMasterService.java index f76cd0b..cbf013b 100644 --- a/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/ApplicationMasterService.java +++ b/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/ApplicationMasterService.java @@ -832,7 +832,7 @@ public final class ApplicationMasterService extends AbstractYarnTwillService imp for (Map.Entry<String, String> option : requestCommand.getOptions().entrySet()) { String runnableName = option.getKey(); Set<Integer> restartedInstanceIds = GSON.fromJson(option.getValue(), - new TypeToken<Set<Integer>>() {}.getType()); + new TypeToken<Set<Integer>>() {}.getType()); LOG.debug("Start restarting runnable {} instances {}", runnableName, restartedInstanceIds); restartRunnableInstances(runnableName, restartedInstanceIds); http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/ecec4b3c/twill-yarn/src/test/java/org/apache/twill/yarn/EchoServerTestRun.java ---------------------------------------------------------------------- diff --git a/twill-yarn/src/test/java/org/apache/twill/yarn/EchoServerTestRun.java b/twill-yarn/src/test/java/org/apache/twill/yarn/EchoServerTestRun.java index 0a8414e..3f0f20c 100644 --- a/twill-yarn/src/test/java/org/apache/twill/yarn/EchoServerTestRun.java +++ b/twill-yarn/src/test/java/org/apache/twill/yarn/EchoServerTestRun.java @@ -117,7 +117,8 @@ public final class EchoServerTestRun extends BaseYarnTest { // Test restart on instances for runnable Map<Integer, String> instanceIdToContainerId = Maps.newHashMap(); - ResourceReport report = waitForAfterRestartResourceReport(controller, "EchoServer", 30L, TimeUnit.SECONDS, 2, null); + ResourceReport report = waitForAfterRestartResourceReport(controller, "EchoServer", 15L, + TimeUnit.MINUTES, 2, null); Assert.assertTrue(report != null); Collection<TwillRunResources> runResources = report.getRunnableResources("EchoServer"); for (TwillRunResources twillRunResources : runResources) { @@ -127,7 +128,7 @@ public final class EchoServerTestRun extends BaseYarnTest { controller.restartAllInstances("EchoServer"); Assert.assertTrue(waitForSize(echoServices, 2, 120)); - report = waitForAfterRestartResourceReport(controller, "EchoServer", 30L, TimeUnit.SECONDS, 2, + report = waitForAfterRestartResourceReport(controller, "EchoServer", 15L, TimeUnit.MINUTES, 2, instanceIdToContainerId); Assert.assertTrue(report != null); @@ -181,6 +182,7 @@ public final class EchoServerTestRun extends BaseYarnTest { Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); } else { if (instanceIdToContainerId == null) { + LOG.info("Return resource report without comparing container ids."); return report; } Collection<TwillRunResources> runResources = report.getRunnableResources(runnable); @@ -189,17 +191,21 @@ public final class EchoServerTestRun extends BaseYarnTest { int instanceId = twillRunResources.getInstanceId(); if (twillRunResources.getContainerId().equals(instanceIdToContainerId.get(instanceId))) { // found same container id lets wait again. + LOG.warn("Found an instance id {} with same container id {} for restart all, let's wait for a while.", + instanceId, twillRunResources.getContainerId()); isSameContainer = true; break; } } if (!isSameContainer) { - LOG.error("Unable to get different container ids for restart."); + LOG.info("Get set of different container ids for restart."); return report; } Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); } } while (stopwatch.elapsedTime(timeoutUnit) < timeout); + + LOG.error("Unable to get different container ids for restart."); return null; } }
