Repository: hadoop Updated Branches: refs/heads/branch-2.8 5f059e03f -> 53ec7c924
YARN-4744. Too many signal to container failure in case of LCE. Contributed by Sidharta Seethana (cherry picked from commit 059caf99891943d9587cac19b48e82efbed06b2d) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/53ec7c92 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/53ec7c92 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/53ec7c92 Branch: refs/heads/branch-2.8 Commit: 53ec7c9243c1109f431f5dd03bf45b461dec3496 Parents: 5f059e0 Author: Jason Lowe <jl...@apache.org> Authored: Mon Mar 7 15:40:01 2016 +0000 Committer: Jason Lowe <jl...@apache.org> Committed: Mon Mar 7 15:48:06 2016 +0000 ---------------------------------------------------------------------- .../nodemanager/LinuxContainerExecutor.java | 4 ++-- .../linux/privileged/PrivilegedOperation.java | 23 +++++++++++++++++--- .../privileged/PrivilegedOperationExecutor.java | 21 ++++++++++-------- .../linux/resources/CGroupsHandlerImpl.java | 2 +- .../linux/resources/TrafficController.java | 2 +- .../runtime/DefaultLinuxContainerRuntime.java | 11 ++++++---- .../runtime/DockerLinuxContainerRuntime.java | 5 ++--- .../TestPrivilegedOperationExecutor.java | 4 ++-- .../linux/resources/TestCGroupsHandlerImpl.java | 2 +- 9 files changed, 48 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index 8549230..d602796 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -166,7 +166,7 @@ public class LinuxContainerExecutor extends ContainerExecutor { // verify configuration/permissions and exit try { PrivilegedOperation checkSetupOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.CHECK_SETUP, (String) null); + PrivilegedOperation.OperationType.CHECK_SETUP); PrivilegedOperationExecutor privilegedOperationExecutor = PrivilegedOperationExecutor.getInstance(conf); @@ -222,7 +222,7 @@ public class LinuxContainerExecutor extends ContainerExecutor { verifyUsernamePattern(user); String runAsUser = getRunAsUser(user); PrivilegedOperation initializeContainerOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.INITIALIZE_CONTAINER, (String) null); + PrivilegedOperation.OperationType.INITIALIZE_CONTAINER); List<String> prefixCommands = new ArrayList<>(); addSchedPriorityCommand(prefixCommands); http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java index cbbf7a8..259dee8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java @@ -68,10 +68,16 @@ public class PrivilegedOperation { private final OperationType opType; private final List<String> args; + private boolean failureLogging; - public PrivilegedOperation(OperationType opType, String arg) { + public PrivilegedOperation(OperationType opType) { this.opType = opType; this.args = new ArrayList<String>(); + this.failureLogging = true; + } + + public PrivilegedOperation(OperationType opType, String arg) { + this(opType); if (arg != null) { this.args.add(arg); @@ -79,8 +85,7 @@ public class PrivilegedOperation { } public PrivilegedOperation(OperationType opType, List<String> args) { - this.opType = opType; - this.args = new ArrayList<String>(); + this(opType); if (args != null) { this.args.addAll(args); @@ -97,6 +102,18 @@ public class PrivilegedOperation { this.args.addAll(args); } + public void enableFailureLogging() { + this.failureLogging = true; + } + + public void disableFailureLogging() { + this.failureLogging = false; + } + + public boolean isFailureLoggingEnabled() { + return failureLogging; + } + public OperationType getOperationType() { return opType; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java index 4b1bb9f..7370daa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java @@ -155,17 +155,20 @@ public class PrivilegedOperationExecutor { LOG.debug(exec.getOutput()); } } catch (ExitCodeException e) { - StringBuilder logBuilder = new StringBuilder("Shell execution returned " - + "exit code: ") - .append(exec.getExitCode()) - .append(". Privileged Execution Operation Output: ") - .append(System.lineSeparator()).append(exec.getOutput()); + if (operation.isFailureLoggingEnabled()) { - logBuilder.append("Full command array for failed execution: ") - .append(System.lineSeparator()); - logBuilder.append(Arrays.toString(fullCommandArray)); + StringBuilder logBuilder = new StringBuilder("Shell execution returned " + + "exit code: ") + .append(exec.getExitCode()) + .append(". Privileged Execution Operation Output: ") + .append(System.lineSeparator()).append(exec.getOutput()); - LOG.warn(logBuilder.toString()); + logBuilder.append("Full command array for failed execution: ") + .append(System.lineSeparator()); + logBuilder.append(Arrays.toString(fullCommandArray)); + + LOG.warn(logBuilder.toString()); + } //stderr from shell executor seems to be stuffed into the exception //'message' - so, we have to extract it and set it as the error out http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java index 0d71a9d..02ce53f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java @@ -247,7 +247,7 @@ class CGroupsHandlerImpl implements CGroupsHandler { .append(controller.getName()).append('=').append(controllerPath); PrivilegedOperation.OperationType opType = PrivilegedOperation .OperationType.MOUNT_CGROUPS; - PrivilegedOperation op = new PrivilegedOperation(opType, (String) null); + PrivilegedOperation op = new PrivilegedOperation(opType); op.appendArgs(hierarchy, cGroupKV.toString()); LOG.info("Mounting controller " + controller.getName() + " at " + http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java index e33cea4..f1468fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java @@ -546,7 +546,7 @@ import java.util.regex.Pattern; case TC_MODIFY_STATE: case TC_READ_STATE: case TC_READ_STATS: - operation = new PrivilegedOperation(opType, (String) null); + operation = new PrivilegedOperation(opType); commands = new ArrayList<>(); break; default: http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java index 633fa66..3862b92 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java @@ -67,7 +67,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime { throws ContainerExecutionException { Container container = ctx.getContainer(); PrivilegedOperation launchOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.LAUNCH_CONTAINER, (String) null); + PrivilegedOperation.OperationType.LAUNCH_CONTAINER); //All of these arguments are expected to be available in the runtime context launchOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), @@ -116,7 +116,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime { throws ContainerExecutionException { Container container = ctx.getContainer(); PrivilegedOperation signalOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null); + PrivilegedOperation.OperationType.SIGNAL_CONTAINER); signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), ctx.getExecutionAttribute(USER), @@ -125,6 +125,9 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime { ctx.getExecutionAttribute(PID), Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue())); + //Some failures here are acceptable. Let the calling executor decide. + signalOp.disableFailureLogging(); + try { PrivilegedOperationExecutor executor = PrivilegedOperationExecutor .getInstance(conf); @@ -133,8 +136,8 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime { signalOp, null, container.getLaunchContext().getEnvironment(), false); } catch (PrivilegedOperationException e) { - LOG.warn("Signal container failed. Exception: ", e); - + //Don't log the failure here. Some kinds of signaling failures are + // acceptable. Let the calling executor decide what to do. throw new ContainerExecutionException("Signal container failed", e .getExitCode(), e.getOutput(), e.getErrorOutput()); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index 9ad04a8..089e6c7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -285,8 +285,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime { String commandFile = dockerClient.writeCommandToTempFile(runCommand, containerIdStr); PrivilegedOperation launchOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER, (String) - null); + PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER); launchOp.appendArgs(runAsUser, ctx.getExecutionAttribute(USER), Integer.toString(PrivilegedOperation @@ -326,7 +325,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime { throws ContainerExecutionException { Container container = ctx.getContainer(); PrivilegedOperation signalOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null); + PrivilegedOperation.OperationType.SIGNAL_CONTAINER); signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), ctx.getExecutionAttribute(USER), http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java index 849dbab..7146412 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java @@ -69,7 +69,7 @@ public class TestPrivilegedOperationExecutor { cGroupTasks2 = "net_cls/hadoop_yarn/container_01/tasks"; cGroupTasks3 = "blkio/hadoop_yarn/container_01/tasks"; opDisallowed = new PrivilegedOperation - (PrivilegedOperation.OperationType.DELETE_AS_USER, (String) null); + (PrivilegedOperation.OperationType.DELETE_AS_USER); opTasksNone = new PrivilegedOperation (PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupTasksNone); @@ -118,7 +118,7 @@ public class TestPrivilegedOperationExecutor { PrivilegedOperationExecutor exec = PrivilegedOperationExecutor .getInstance(confWithExecutorPath); PrivilegedOperation op = new PrivilegedOperation(PrivilegedOperation - .OperationType.TC_MODIFY_STATE, (String) null); + .OperationType.TC_MODIFY_STATE); String[] cmdArray = exec.getPrivilegedOperationExecutionCommand(null, op); //No arguments added - so the resulting array should consist of http://git-wip-us.apache.org/repos/asf/hadoop/blob/53ec7c92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java index 50f8da6..76d56b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java @@ -89,7 +89,7 @@ public class TestCGroupsHandlerImpl { cGroupsHandler = new CGroupsHandlerImpl(conf, privilegedOperationExecutorMock); PrivilegedOperation expectedOp = new PrivilegedOperation( - PrivilegedOperation.OperationType.MOUNT_CGROUPS, (String) null); + PrivilegedOperation.OperationType.MOUNT_CGROUPS); //This is expected to be of the form : //net_cls=<mount_path>/net_cls StringBuffer controllerKV = new StringBuffer(controller.getName())