YARN-8508. Release GPU resource for killed container.
Contributed by Chandni Singh
(cherry picked from commit ed9d60e888d0acfd748fda7f66249f5b79a3ed6d)
(cherry picked from commit c2c3eee69c8b389525fbde800e057dbcb2fc643e)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/17625e40
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/17625e40
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/17625e40
Branch: refs/heads/branch-3.1.1
Commit: 17625e40f61ac31fc87f4a74d66388dbe508d33d
Parents: 096c139
Author: Eric Yang <[email protected]>
Authored: Fri Jul 27 19:33:58 2018 -0400
Committer: Wangda Tan <[email protected]>
Committed: Tue Jul 31 12:06:54 2018 -0700
----------------------------------------------------------------------
.../nodemanager/LinuxContainerExecutor.java | 34 ++++++++++----------
.../nodemanager/TestLinuxContainerExecutor.java | 9 +++++-
2 files changed, 25 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/17625e40/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 03b88a4..4253f2f 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -573,15 +573,7 @@ public class LinuxContainerExecutor extends
ContainerExecutor {
return handleExitCode(e, container, containerId);
} finally {
resourcesHandler.postExecute(containerId);
-
- try {
- if (resourceHandlerChain != null) {
- resourceHandlerChain.postComplete(containerId);
- }
- } catch (ResourceHandlerException e) {
- LOG.warn("ResourceHandlerChain.postComplete failed for " +
- "containerId: " + containerId + ". Exception: " + e);
- }
+ postComplete(containerId);
}
return 0;
@@ -721,14 +713,7 @@ public class LinuxContainerExecutor extends
ContainerExecutor {
return super.reacquireContainer(ctx);
} finally {
resourcesHandler.postExecute(containerId);
- if (resourceHandlerChain != null) {
- try {
- resourceHandlerChain.postComplete(containerId);
- } catch (ResourceHandlerException e) {
- LOG.warn("ResourceHandlerChain.postComplete failed for " +
- "containerId: " + containerId + " Exception: " + e);
- }
- }
+ postComplete(containerId);
}
}
@@ -798,6 +783,8 @@ public class LinuxContainerExecutor extends
ContainerExecutor {
logOutput(e.getOutput());
throw new IOException("Error in reaping container "
+ container.getContainerId().toString() + " exit = " + retCode, e);
+ } finally {
+ postComplete(container.getContainerId());
}
return true;
}
@@ -968,4 +955,17 @@ public class LinuxContainerExecutor extends
ContainerExecutor {
LOG.warn("Unable to remove docker container: " + containerId);
}
}
+
+ @VisibleForTesting
+ void postComplete(final ContainerId containerId) {
+ try {
+ if (resourceHandlerChain != null) {
+ LOG.debug("{} post complete", containerId);
+ resourceHandlerChain.postComplete(containerId);
+ }
+ } catch (ResourceHandlerException e) {
+ LOG.warn("ResourceHandlerChain.postComplete failed for " +
+ "containerId: {}. Exception: ", containerId, e);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/17625e40/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
index ddbf3b9..6d77fc4 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
@@ -25,11 +25,14 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -40,6 +43,7 @@ import java.io.IOException;
import java.io.PrintWriter;
import java.net.InetSocketAddress;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
@@ -667,12 +671,15 @@ public class TestLinuxContainerExecutor {
@Test
public void testReapContainer() throws Exception {
Container container = mock(Container.class);
- LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class);
+ LinuxContainerRuntime containerRuntime = mock(LinuxContainerRuntime.class);
+ LinuxContainerExecutor lce = spy(new LinuxContainerExecutor(
+ containerRuntime));
ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
builder.setContainer(container).setUser("foo");
ContainerReapContext ctx = builder.build();
lce.reapContainer(ctx);
verify(lce, times(1)).reapContainer(ctx);
+ verify(lce, times(1)).postComplete(anyObject());
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]