YARN-2531. Added a configuration for admins to be able to override app-configs 
and enforce/not-enforce strict control of per-container cpu usage. Contributed 
by Varun Vasudev.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9f6891d9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9f6891d9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9f6891d9

Branch: refs/heads/HDFS-6581
Commit: 9f6891d9ef7064d121305ca783eb62586c8aa018
Parents: 0c26412
Author: Vinod Kumar Vavilapalli <vino...@apache.org>
Authored: Tue Sep 16 10:14:46 2014 -0700
Committer: Vinod Kumar Vavilapalli <vino...@apache.org>
Committed: Tue Sep 16 10:14:46 2014 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |   4 +
 .../hadoop/yarn/conf/YarnConfiguration.java     |  10 ++
 .../src/main/resources/yarn-default.xml         |  10 ++
 .../util/CgroupsLCEResourcesHandler.java        |  29 +++-
 .../util/TestCgroupsLCEResourcesHandler.java    | 165 ++++++++++++++++---
 5 files changed, 189 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/9f6891d9/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index b41ad82..66623ec 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -84,6 +84,10 @@ Release 2.6.0 - UNRELEASED
     failures should be ignored towards counting max-attempts. (Xuan Gong via
     vinodkv)
 
+    YARN-2531. Added a configuration for admins to be able to override 
app-configs
+    and enforce/not-enforce strict control of per-container cpu usage. (Varun
+    Vasudev via vinodkv)
+
   IMPROVEMENTS
 
     YARN-2197. Add a link to YARN CHANGES.txt in the left side of doc

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9f6891d9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index a92b358..44a6fc3 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -902,6 +902,16 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH =
     NM_PREFIX + "linux-container-executor.cgroups.mount-path";
 
+  /**
+   * Whether the apps should run in strict resource usage mode(not allowed to
+   * use spare CPU)
+   */
+  public static final String NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE =
+      NM_PREFIX + "linux-container-executor.cgroups.strict-resource-usage";
+  public static final boolean 
DEFAULT_NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE =
+      false;
+
+
 
   /**
    * Interval of time the linux container executor should try cleaning up

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9f6891d9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 55e5bca..3a7e94a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1040,6 +1040,16 @@
   </property>
 
   <property>
+    <description>This flag determines whether apps should run with strict 
resource limits
+    or be allowed to consume spare resources if they need them. For example, 
turning the
+    flag on will restrict apps to use only their share of CPU, even if the 
node has spare
+    CPU cycles. The default value is false i.e. use available resources. 
Please note that
+    turning this flag on may reduce job throughput on the 
cluster.</description>
+    
<name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
+    <value>false</value>
+  </property>
+
+  <property>
     <description>T-file compression types used to compress aggregated 
logs.</description>
     <name>yarn.nodemanager.log-aggregation.compression-type</name>
     <value>none</value>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9f6891d9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
index 0b6c2ac..63039d8 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java
@@ -57,6 +57,7 @@ public class CgroupsLCEResourcesHandler implements 
LCEResourcesHandler {
   private String cgroupMountPath;
 
   private boolean cpuWeightEnabled = true;
+  private boolean strictResourceUsageMode = false;
 
   private final String MTAB_FILE = "/proc/mounts";
   private final String CGROUPS_FSTYPE = "cgroup";
@@ -71,6 +72,8 @@ public class CgroupsLCEResourcesHandler implements 
LCEResourcesHandler {
   private long deleteCgroupTimeout;
   // package private for testing purposes
   Clock clock;
+
+  private float yarnProcessors;
   
   public CgroupsLCEResourcesHandler() {
     this.controllerPaths = new HashMap<String, String>();
@@ -105,6 +108,12 @@ public class CgroupsLCEResourcesHandler implements 
LCEResourcesHandler {
       cgroupPrefix = cgroupPrefix.substring(1);
     }
 
+    this.strictResourceUsageMode =
+        conf
+          .getBoolean(
+            YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE,
+            
YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE);
+
     int len = cgroupPrefix.length();
     if (cgroupPrefix.charAt(len - 1) == '/') {
       cgroupPrefix = cgroupPrefix.substring(0, len - 1);
@@ -132,8 +141,7 @@ public class CgroupsLCEResourcesHandler implements 
LCEResourcesHandler {
     initializeControllerPaths();
 
     // cap overall usage to the number of cores allocated to YARN
-    float yarnProcessors =
-        NodeManagerHardwareUtils.getContainersCores(plugin, conf);
+    yarnProcessors = NodeManagerHardwareUtils.getContainersCores(plugin, conf);
     int systemProcessors = plugin.getNumProcessors();
     if (systemProcessors != (int) yarnProcessors) {
       LOG.info("YARN containers restricted to " + yarnProcessors + " cores");
@@ -290,10 +298,25 @@ public class CgroupsLCEResourcesHandler implements 
LCEResourcesHandler {
     String containerName = containerId.toString();
 
     if (isCpuWeightEnabled()) {
+      int containerVCores = containerResource.getVirtualCores();
       createCgroup(CONTROLLER_CPU, containerName);
-      int cpuShares = CPU_DEFAULT_WEIGHT * containerResource.getVirtualCores();
+      int cpuShares = CPU_DEFAULT_WEIGHT * containerVCores;
       updateCgroup(CONTROLLER_CPU, containerName, "shares",
           String.valueOf(cpuShares));
+      if (strictResourceUsageMode) {
+        int nodeVCores =
+            conf.getInt(YarnConfiguration.NM_VCORES,
+              YarnConfiguration.DEFAULT_NM_VCORES);
+        if (nodeVCores != containerVCores) {
+          float containerCPU =
+              (containerVCores * yarnProcessors) / (float) nodeVCores;
+          int[] limits = getOverallLimits(containerCPU);
+          updateCgroup(CONTROLLER_CPU, containerName, CPU_PERIOD_US,
+            String.valueOf(limits[0]));
+          updateCgroup(CONTROLLER_CPU, containerName, CPU_QUOTA_US,
+            String.valueOf(limits[1]));
+        }
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9f6891d9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java
index 4506898..de2af37 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java
@@ -18,6 +18,8 @@
 package org.apache.hadoop.yarn.server.nodemanager.util;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor;
 import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
 import org.junit.Assert;
@@ -86,9 +88,13 @@ public class TestCgroupsLCEResourcesHandler {
 
     String mtabFile;
     int[] limits = new int[2];
+    boolean generateLimitsMode = false;
 
     @Override
     int[] getOverallLimits(float x) {
+      if (generateLimitsMode == true) {
+        return super.getOverallLimits(x);
+      }
       return limits;
     }
 
@@ -116,32 +122,11 @@ public class TestCgroupsLCEResourcesHandler {
     handler.initConfig();
 
     // create mock cgroup
-    File cgroupDir = new File("target", UUID.randomUUID().toString());
-    if (!cgroupDir.mkdir()) {
-      String message = "Could not create dir " + cgroupDir.getAbsolutePath();
-      throw new IOException(message);
-    }
-    File cgroupMountDir = new File(cgroupDir.getAbsolutePath(), "hadoop-yarn");
-    if (!cgroupMountDir.mkdir()) {
-      String message =
-          "Could not create dir " + cgroupMountDir.getAbsolutePath();
-      throw new IOException(message);
-    }
+    File cgroupDir = createMockCgroup();
+    File cgroupMountDir = createMockCgroupMount(cgroupDir);
 
     // create mock mtab
-    String mtabContent =
-        "none " + cgroupDir.getAbsolutePath() + " cgroup rw,relatime,cpu 0 0";
-    File mockMtab = new File("target", UUID.randomUUID().toString());
-    if (!mockMtab.exists()) {
-      if (!mockMtab.createNewFile()) {
-        String message = "Could not create file " + mockMtab.getAbsolutePath();
-        throw new IOException(message);
-      }
-    }
-    FileWriter mtabWriter = new FileWriter(mockMtab.getAbsoluteFile());
-    mtabWriter.write(mtabContent);
-    mtabWriter.close();
-    mockMtab.deleteOnExit();
+    File mockMtab = createMockMTab(cgroupDir);
 
     // setup our handler and call init()
     handler.setMtabFile(mockMtab.getAbsolutePath());
@@ -156,7 +141,8 @@ public class TestCgroupsLCEResourcesHandler {
     Assert.assertFalse(quotaFile.exists());
 
     // subset of cpu being used, files should be created
-    conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 
75);
+    conf
+      .setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 75);
     handler.limits[0] = 100 * 1000;
     handler.limits[1] = 1000 * 1000;
     handler.init(mockLCE, plugin);
@@ -166,7 +152,8 @@ public class TestCgroupsLCEResourcesHandler {
     Assert.assertEquals(1000 * 1000, quota);
 
     // set cpu back to 100, quota should be -1
-    conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 
100);
+    conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT,
+      100);
     handler.limits[0] = 100 * 1000;
     handler.limits[1] = 1000 * 1000;
     handler.init(mockLCE, plugin);
@@ -213,4 +200,130 @@ public class TestCgroupsLCEResourcesHandler {
     Assert.assertEquals(1000 * 1000, ret[0]);
     Assert.assertEquals(-1, ret[1]);
   }
+
+  private File createMockCgroup() throws IOException {
+    File cgroupDir = new File("target", UUID.randomUUID().toString());
+    if (!cgroupDir.mkdir()) {
+      String message = "Could not create dir " + cgroupDir.getAbsolutePath();
+      throw new IOException(message);
+    }
+    return cgroupDir;
+  }
+
+  private File createMockCgroupMount(File cgroupDir) throws IOException {
+    File cgroupMountDir = new File(cgroupDir.getAbsolutePath(), "hadoop-yarn");
+    if (!cgroupMountDir.mkdir()) {
+      String message =
+          "Could not create dir " + cgroupMountDir.getAbsolutePath();
+      throw new IOException(message);
+    }
+    return cgroupMountDir;
+  }
+
+  private File createMockMTab(File cgroupDir) throws IOException {
+    String mtabContent =
+        "none " + cgroupDir.getAbsolutePath() + " cgroup rw,relatime,cpu 0 0";
+    File mockMtab = new File("target", UUID.randomUUID().toString());
+    if (!mockMtab.exists()) {
+      if (!mockMtab.createNewFile()) {
+        String message = "Could not create file " + mockMtab.getAbsolutePath();
+        throw new IOException(message);
+      }
+    }
+    FileWriter mtabWriter = new FileWriter(mockMtab.getAbsoluteFile());
+    mtabWriter.write(mtabContent);
+    mtabWriter.close();
+    mockMtab.deleteOnExit();
+    return mockMtab;
+  }
+
+  @Test
+  public void testContainerLimits() throws IOException {
+    LinuxContainerExecutor mockLCE = new MockLinuxContainerExecutor();
+    CustomCgroupsLCEResourceHandler handler =
+        new CustomCgroupsLCEResourceHandler();
+    handler.generateLimitsMode = true;
+    YarnConfiguration conf = new YarnConfiguration();
+    final int numProcessors = 4;
+    ResourceCalculatorPlugin plugin =
+        Mockito.mock(ResourceCalculatorPlugin.class);
+    Mockito.doReturn(numProcessors).when(plugin).getNumProcessors();
+    handler.setConf(conf);
+    handler.initConfig();
+
+    // create mock cgroup
+    File cgroupDir = createMockCgroup();
+    File cgroupMountDir = createMockCgroupMount(cgroupDir);
+
+    // create mock mtab
+    File mockMtab = createMockMTab(cgroupDir);
+
+    // setup our handler and call init()
+    handler.setMtabFile(mockMtab.getAbsolutePath());
+    handler.init(mockLCE, plugin);
+
+    // check values
+    // default case - files shouldn't exist, strict mode off by default
+    ContainerId id = ContainerId.fromString("container_1_1_1_1");
+    handler.preExecute(id, Resource.newInstance(1024, 1));
+    File containerDir = new File(cgroupMountDir, id.toString());
+    Assert.assertTrue(containerDir.exists());
+    Assert.assertTrue(containerDir.isDirectory());
+    File periodFile = new File(containerDir, "cpu.cfs_period_us");
+    File quotaFile = new File(containerDir, "cpu.cfs_quota_us");
+    Assert.assertFalse(periodFile.exists());
+    Assert.assertFalse(quotaFile.exists());
+
+    // no files created because we're using all cpu
+    FileUtils.deleteQuietly(containerDir);
+    conf.setBoolean(
+      YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE, 
true);
+    handler.initConfig();
+    handler.preExecute(id,
+      Resource.newInstance(1024, YarnConfiguration.DEFAULT_NM_VCORES));
+    Assert.assertTrue(containerDir.exists());
+    Assert.assertTrue(containerDir.isDirectory());
+    periodFile = new File(containerDir, "cpu.cfs_period_us");
+    quotaFile = new File(containerDir, "cpu.cfs_quota_us");
+    Assert.assertFalse(periodFile.exists());
+    Assert.assertFalse(quotaFile.exists());
+
+    // 50% of CPU
+    FileUtils.deleteQuietly(containerDir);
+    conf.setBoolean(
+      YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE, 
true);
+    handler.initConfig();
+    handler.preExecute(id,
+      Resource.newInstance(1024, YarnConfiguration.DEFAULT_NM_VCORES / 2));
+    Assert.assertTrue(containerDir.exists());
+    Assert.assertTrue(containerDir.isDirectory());
+    periodFile = new File(containerDir, "cpu.cfs_period_us");
+    quotaFile = new File(containerDir, "cpu.cfs_quota_us");
+    Assert.assertTrue(periodFile.exists());
+    Assert.assertTrue(quotaFile.exists());
+    Assert.assertEquals(500 * 1000, readIntFromFile(periodFile));
+    Assert.assertEquals(1000 * 1000, readIntFromFile(quotaFile));
+
+    // CGroups set to 50% of CPU, container set to 50% of YARN CPU
+    FileUtils.deleteQuietly(containerDir);
+    conf.setBoolean(
+      YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE, 
true);
+    conf
+      .setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 50);
+    handler.initConfig();
+    handler.init(mockLCE, plugin);
+    handler.preExecute(id,
+      Resource.newInstance(1024, YarnConfiguration.DEFAULT_NM_VCORES / 2));
+    Assert.assertTrue(containerDir.exists());
+    Assert.assertTrue(containerDir.isDirectory());
+    periodFile = new File(containerDir, "cpu.cfs_period_us");
+    quotaFile = new File(containerDir, "cpu.cfs_quota_us");
+    Assert.assertTrue(periodFile.exists());
+    Assert.assertTrue(quotaFile.exists());
+    Assert.assertEquals(1000 * 1000, readIntFromFile(periodFile));
+    Assert.assertEquals(1000 * 1000, readIntFromFile(quotaFile));
+
+    FileUtils.deleteQuietly(cgroupDir);
+  }
+
 }

Reply via email to