Repository: flink Updated Branches: refs/heads/release-1.1 2203f743a -> c9433bf60
[FLINK-3706] Fix YARN test instability The most important change in this commit is that the `YarnTestBase.Runner` doesn't do "try {} catch (Throwable t) { fail(t); }" anymore, which doesn't lead to a test failure, because its called outside the main thread. With the change, all throwables are reported back to the main thread and fail the test there properly (many YARN tests benefit from this change). This closes #2622 Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/c9433bf6 Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/c9433bf6 Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/c9433bf6 Branch: refs/heads/release-1.1 Commit: c9433bf6073d8ad7a4892cb9f020105b6a7f8d72 Parents: 2203f74 Author: Robert Metzger <rmetz...@apache.org> Authored: Mon Oct 10 17:04:16 2016 +0200 Committer: Robert Metzger <rmetz...@apache.org> Committed: Wed Oct 12 12:00:40 2016 +0200 ---------------------------------------------------------------------- .../YARNSessionCapacitySchedulerITCase.java | 13 ++--- .../flink/yarn/YARNSessionFIFOITCase.java | 17 +------ .../org/apache/flink/yarn/YarnTestBase.java | 50 +++++++++++--------- 3 files changed, 36 insertions(+), 44 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/c9433bf6/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java ---------------------------------------------------------------------- diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java index 513a9fc..7f5143d 100644 --- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java +++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java @@ -322,12 +322,13 @@ public class YARNSessionCapacitySchedulerITCase extends YarnTestBase { } /** - * Test deployment to non-existing queue. (user-reported error) - * Deployment to the queue is possible because there are no queues, so we don't check. + * Test deployment to non-existing queue & ensure that the system logs a WARN message + * for the user. (Users had unexpected behavior of Flink on YARN because they mistyped the + * target queue. With an error message, we can help users identifying the issue) */ @Test - public void testNonexistingQueue() { - LOG.info("Starting testNonexistingQueue()"); + public void testNonexistingQueueWARNmessage() { + LOG.info("Starting testNonexistingQueueWARNmessage()"); addTestAppender(YarnClusterDescriptor.class, Level.WARN); runWithArgs(new String[]{"-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), @@ -335,8 +336,8 @@ public class YARNSessionCapacitySchedulerITCase extends YarnTestBase { "-jm", "768", "-tm", "1024", "-qu", "doesntExist"}, "to unknown queue: doesntExist", null, RunTypes.YARN_SESSION, 1); - checkForLogString("The specified queue 'doesntExist' does not exist. Available queues: default, qa-team"); - LOG.info("Finished testNonexistingQueue()"); + checkForLogString("The specified queue 'doesntExist' does not exist. Available queues"); + LOG.info("Finished testNonexistingQueueWARNmessage()"); } /** http://git-wip-us.apache.org/repos/asf/flink/blob/c9433bf6/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java ---------------------------------------------------------------------- diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java index 8a2ad60..0a22a38 100644 --- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java +++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java @@ -84,7 +84,7 @@ public class YARNSessionFIFOITCase extends YarnTestBase { public void testDetachedMode() { LOG.info("Starting testDetachedMode()"); addTestAppender(FlinkYarnSessionCli.class, Level.INFO); - Runner runner = startWithArgs(new String[]{"-j", flinkUberjar.getAbsolutePath(), + startWithArgs(new String[]{"-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", @@ -138,21 +138,6 @@ public class YARNSessionFIFOITCase extends YarnTestBase { LOG.info("Finished testQueryCluster()"); } - /** - * Test deployment to non-existing queue. (user-reported error) - * Deployment to the queue is possible because there are no queues, so we don't check. - */ - @Test - public void testNonexistingQueue() { - LOG.info("Starting testNonexistingQueue()"); - runWithArgs(new String[]{"-j", flinkUberjar.getAbsolutePath(), - "-t", flinkLibFolder.getAbsolutePath(), - "-n", "1", - "-jm", "768", - "-tm", "1024", - "-qu", "doesntExist"}, "Number of connected TaskManagers changed to 1. Slots available: 1", null, RunTypes.YARN_SESSION, 0); - LOG.info("Finished testNonexistingQueue()"); - } /** * The test cluster has the following resources: http://git-wip-us.apache.org/repos/asf/flink/blob/c9433bf6/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java ---------------------------------------------------------------------- diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java index 0243012..7e612c4 100644 --- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java +++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java @@ -425,7 +425,7 @@ public abstract class YarnTestBase extends TestLogger { final int START_TIMEOUT_SECONDS = 60; - Runner runner = new Runner(args, type); + Runner runner = new Runner(args, type, 0); runner.setName("Frontend (CLI/YARN Client) runner thread (startWithArgs())."); runner.start(); @@ -440,7 +440,10 @@ public abstract class YarnTestBase extends TestLogger { // check if thread died if(!runner.isAlive()) { sendOutput(); - Assert.fail("Runner thread died before the test was finished. Return value = "+runner.getReturnValue()); + if(runner.getRunnerError() != null) { + throw new RuntimeException("Runner failed with exception.", runner.getRunnerError()); + } + Assert.fail("Runner thread died before the test was finished."); } } @@ -459,10 +462,10 @@ public abstract class YarnTestBase extends TestLogger { * @param terminateAfterString the runner is searching the stdout and stderr for this string. as soon as it appears, the test has passed * @param failOnPatterns The runner is searching stdout and stderr for the pattern (regexp) specified here. If one appears, the test has failed * @param type Set the type of the runner - * @param returnCode Expected return code from the runner. + * @param expectedReturnValue Expected return code from the runner. * @param checkLogForTerminateString If true, the runner checks also the log4j logger for the terminate string */ - protected void runWithArgs(String[] args, String terminateAfterString, String[] failOnPatterns, RunTypes type, int returnCode, boolean checkLogForTerminateString) { + protected void runWithArgs(String[] args, String terminateAfterString, String[] failOnPatterns, RunTypes type, int expectedReturnValue, boolean checkLogForTerminateString) { LOG.info("Running with args {}", Arrays.toString(args)); outContent = new ByteArrayOutputStream(); @@ -475,7 +478,7 @@ public abstract class YarnTestBase extends TestLogger { final int START_TIMEOUT_SECONDS = 180; final long deadline = System.currentTimeMillis() + (START_TIMEOUT_SECONDS * 1000); - Runner runner = new Runner(args, type); + Runner runner = new Runner(args, type, expectedReturnValue); runner.start(); boolean expectedStringSeen = false; @@ -524,25 +527,22 @@ public abstract class YarnTestBase extends TestLogger { else { // check if thread died if (!runner.isAlive()) { - if (runner.getReturnValue() != 0) { - Assert.fail("Runner thread died before the test was finished. Return value = " - + runner.getReturnValue()); - } else { - LOG.info("Runner stopped earlier than expected with return value = 0"); - } // leave loop: the runner died, so we can not expect new strings to show up. break; } } } - while (!expectedStringSeen && System.currentTimeMillis() < deadline); + while (runner.getRunnerError() == null && !expectedStringSeen && System.currentTimeMillis() < deadline); sendOutput(); + + if(runner.getRunnerError() != null) { + // this lets the test fail. + throw new RuntimeException("Runner failed", runner.getRunnerError()); + } Assert.assertTrue("During the timeout period of " + START_TIMEOUT_SECONDS + " seconds the " + "expected string did not show up", expectedStringSeen); - // check for 0 return code - Assert.assertEquals("Expected return value", returnCode, runner.getReturnValue()); LOG.info("Test was successful"); } @@ -556,22 +556,22 @@ public abstract class YarnTestBase extends TestLogger { public static class Runner extends Thread { private final String[] args; - private int returnValue; + private final int expectedReturnValue; private RunTypes type; private FlinkYarnSessionCli yCli; + private Throwable runnerError; - public Runner(String[] args, RunTypes type) { + public Runner(String[] args, RunTypes type, int expectedReturnValue) { this.args = args; this.type = type; + this.expectedReturnValue = expectedReturnValue; } - public int getReturnValue() { - return returnValue; - } @Override public void run() { try { + int returnValue; switch (type) { case YARN_SESSION: yCli = new FlinkYarnSessionCli("", "", false); @@ -605,11 +605,13 @@ public abstract class YarnTestBase extends TestLogger { throw new RuntimeException("Unknown type " + type); } - if (returnValue != 0) { - Assert.fail("The YARN session returned with non-null value=" + returnValue); + if (returnValue != this.expectedReturnValue) { + Assert.fail("The YARN session returned with unexpected value=" + returnValue + " expected=" + expectedReturnValue); } } catch (Throwable t) { - Assert.fail(t.getMessage()); + LOG.info("Runner stopped with exception", t); + // save error. + this.runnerError = t; } } @@ -619,6 +621,10 @@ public abstract class YarnTestBase extends TestLogger { yCli.stop(); } } + + public Throwable getRunnerError() { + return runnerError; + } } // -------------------------- Tear down -------------------------- //