YARN-2431. NM restart: cgroup is not removed for reacquired containers. 
Contributed by Jason Lowe


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3fa5f728
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3fa5f728
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3fa5f728

Branch: refs/heads/MR-2841
Commit: 3fa5f728c4d6944302621965d6a0376827af5b51
Parents: b44b2ee
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Sep 4 21:11:27 2014 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Thu Sep 4 21:11:27 2014 +0000

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 +
 .../nodemanager/LinuxContainerExecutor.java     | 11 ++++
 .../nodemanager/TestLinuxContainerExecutor.java | 60 ++++++++++++++++++++
 3 files changed, 74 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/3fa5f728/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index b0e4a01..f6f5c4b 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -278,6 +278,9 @@ Release 2.6.0 - UNRELEASED
     YARN-2462. 
TestNodeManagerResync#testBlockNewContainerRequestsOnStartAndResync
     should have a test timeout (Eric Payne via jlowe)
 
+    YARN-2431. NM restart: cgroup is not removed for reacquired containers
+    (jlowe)
+
 Release 2.5.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/3fa5f728/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 804864e..6b8dd28 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.server.nodemanager;
 
 import com.google.common.base.Optional;
+
 import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
@@ -342,6 +343,16 @@ public class LinuxContainerExecutor extends 
ContainerExecutor {
   }
 
   @Override
+  public int reacquireContainer(String user, ContainerId containerId)
+      throws IOException {
+    try {
+      return super.reacquireContainer(user, containerId);
+    } finally {
+      resourcesHandler.postExecute(containerId);
+    }
+  }
+
+  @Override
   public boolean signalContainer(String user, String pid, Signal signal)
       throws IOException {
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/3fa5f728/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
index a5ec43b..c02212e 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
@@ -30,6 +30,8 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
 
 import org.junit.Assert;
 import org.apache.commons.logging.Log;
@@ -42,11 +44,15 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.util.LCEResourcesHandler;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -353,4 +359,58 @@ public class TestLinuxContainerExecutor {
     }
   }
 
+  @Test(timeout=10000)
+  public void testPostExecuteAfterReacquisition() throws Exception {
+    // make up some bogus container ID
+    ApplicationId appId = ApplicationId.newInstance(12345, 67890);
+    ApplicationAttemptId attemptId =
+        ApplicationAttemptId.newInstance(appId, 54321);
+    ContainerId cid = ContainerId.newInstance(attemptId, 9876);
+
+    Configuration conf = new YarnConfiguration();
+    conf.setClass(YarnConfiguration.NM_LINUX_CONTAINER_RESOURCES_HANDLER,
+        TestResourceHandler.class, LCEResourcesHandler.class);
+    LinuxContainerExecutor lce = new LinuxContainerExecutor();
+    lce.setConf(conf);
+    try {
+      lce.init();
+    } catch (IOException e) {
+      // expected if LCE isn't setup right, but not necessary for this test
+    }
+    lce.reacquireContainer("foouser", cid);
+    Assert.assertTrue("postExec not called after reacquisition",
+        TestResourceHandler.postExecContainers.contains(cid));
+  }
+
+  private static class TestResourceHandler implements LCEResourcesHandler {
+    static Set<ContainerId> postExecContainers = new HashSet<ContainerId>();
+
+    @Override
+    public void setConf(Configuration conf) {
+    }
+
+    @Override
+    public Configuration getConf() {
+      return null;
+    }
+
+    @Override
+    public void init(LinuxContainerExecutor lce) throws IOException {
+    }
+
+    @Override
+    public void preExecute(ContainerId containerId, Resource containerResource)
+        throws IOException {
+    }
+
+    @Override
+    public void postExecute(ContainerId containerId) {
+      postExecContainers.add(containerId);
+    }
+
+    @Override
+    public String getResourcesOption(ContainerId containerId) {
+      return null;
+    }
+  }
 }

Reply via email to