Repository: spark Updated Branches: refs/heads/master 1813c4a8d -> 628bdeabd
[SPARK-17742][CORE] Fail launcher app handle if child process exits with error. This is a follow up to cba826d0; that commit set the app handle state to "LOST" when the child process exited, but that can be ambiguous. This change sets the state to "FAILED" if the exit code was non-zero and the handle state wasn't a failure state, or "LOST" if the exit status was zero. Author: Marcelo Vanzin <van...@cloudera.com> Closes #19012 from vanzin/SPARK-17742. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/628bdeab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/628bdeab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/628bdeab Branch: refs/heads/master Commit: 628bdeabda3347d0903c9ac8748d37d7b379d1e6 Parents: 1813c4a Author: Marcelo Vanzin <van...@cloudera.com> Authored: Fri Aug 25 10:04:21 2017 -0700 Committer: Marcelo Vanzin <van...@cloudera.com> Committed: Fri Aug 25 10:04:21 2017 -0700 ---------------------------------------------------------------------- .../spark/launcher/ChildProcAppHandle.java | 27 +++++++++++++++----- .../spark/launcher/ChildProcAppHandleSuite.java | 21 ++++++++++++++- 2 files changed, 41 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/628bdeab/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java ---------------------------------------------------------------------- diff --git a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java index bf91640..5391d4a 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java +++ b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java @@ -156,9 +156,15 @@ class ChildProcAppHandle implements SparkAppHandle { * the exit code. */ void monitorChild() { - while (childProc.isAlive()) { + Process proc = childProc; + if (proc == null) { + // Process may have already been disposed of, e.g. by calling kill(). + return; + } + + while (proc.isAlive()) { try { - childProc.waitFor(); + proc.waitFor(); } catch (Exception e) { LOG.log(Level.WARNING, "Exception waiting for child process to exit.", e); } @@ -173,15 +179,24 @@ class ChildProcAppHandle implements SparkAppHandle { int ec; try { - ec = childProc.exitValue(); + ec = proc.exitValue(); } catch (Exception e) { LOG.log(Level.WARNING, "Exception getting child process exit code, assuming failure.", e); ec = 1; } - // Only override the success state; leave other fail states alone. - if (!state.isFinal() || (ec != 0 && state == State.FINISHED)) { - state = State.LOST; + State newState = null; + if (ec != 0) { + // Override state with failure if the current state is not final, or is success. + if (!state.isFinal() || state == State.FINISHED) { + newState = State.FAILED; + } + } else if (!state.isFinal()) { + newState = State.LOST; + } + + if (newState != null) { + state = newState; fireEvent(false); } } http://git-wip-us.apache.org/repos/asf/spark/blob/628bdeab/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java ---------------------------------------------------------------------- diff --git a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java index 602f55a..3b4d1b0 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java @@ -46,7 +46,9 @@ public class ChildProcAppHandleSuite extends BaseSuite { private static final List<String> TEST_SCRIPT = Arrays.asList( "#!/bin/sh", "echo \"output\"", - "echo \"error\" 1>&2"); + "echo \"error\" 1>&2", + "while [ -n \"$1\" ]; do EC=$1; shift; done", + "exit $EC"); private static File TEST_SCRIPT_PATH; @@ -176,6 +178,7 @@ public class ChildProcAppHandleSuite extends BaseSuite { @Test public void testProcMonitorWithOutputRedirection() throws Exception { + assumeFalse(isWindows()); File err = Files.createTempFile("out", "txt").toFile(); SparkAppHandle handle = new TestSparkLauncher() .redirectError() @@ -187,6 +190,7 @@ public class ChildProcAppHandleSuite extends BaseSuite { @Test public void testProcMonitorWithLogRedirection() throws Exception { + assumeFalse(isWindows()); SparkAppHandle handle = new TestSparkLauncher() .redirectToLog(getClass().getName()) .startApplication(); @@ -194,6 +198,16 @@ public class ChildProcAppHandleSuite extends BaseSuite { assertEquals(SparkAppHandle.State.LOST, handle.getState()); } + @Test + public void testFailedChildProc() throws Exception { + assumeFalse(isWindows()); + SparkAppHandle handle = new TestSparkLauncher(1) + .redirectToLog(getClass().getName()) + .startApplication(); + waitFor(handle); + assertEquals(SparkAppHandle.State.FAILED, handle.getState()); + } + private void waitFor(SparkAppHandle handle) throws Exception { long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10); try { @@ -212,7 +226,12 @@ public class ChildProcAppHandleSuite extends BaseSuite { private static class TestSparkLauncher extends SparkLauncher { TestSparkLauncher() { + this(0); + } + + TestSparkLauncher(int ec) { setAppResource("outputredirtest"); + addAppArgs(String.valueOf(ec)); } @Override --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org