YARN-6670 Add separate NM overallocation thresholds for cpu and memory (Haibo 
Chen)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/31a732ee
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/31a732ee
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/31a732ee

Branch: refs/heads/YARN-1011
Commit: 31a732ee515a243a086046502e7086adb7ca8d52
Parents: ab91ebd
Author: Haibo Chen <haiboc...@apache.org>
Authored: Mon Jul 10 09:55:42 2017 -0700
Committer: Haibo Chen <haiboc...@apache.org>
Committed: Sun Jan 28 17:43:50 2018 -0800

----------------------------------------------------------------------
 .../hadoop/yarn/conf/YarnConfiguration.java     | 36 +++++++++--
 .../src/main/resources/yarn-default.xml         | 42 ++++++++++--
 .../server/api/records/ResourceThresholds.java  | 11 +++-
 .../monitor/ContainersMonitorImpl.java          | 67 +++++++++++++++-----
 4 files changed, 124 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index d8f4d34..ded0c1f 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1949,17 +1949,39 @@ public class YarnConfiguration extends Configuration {
   public static final long DEFAULT_RM_APPLICATION_MONITOR_INTERVAL_MS =
       3000;
 
-  /** Overallocation (= allocation based on utilization) configs. */
-  public static final String NM_OVERALLOCATION_ALLOCATION_THRESHOLD =
-      NM_PREFIX + "overallocation.allocation-threshold";
-  public static final float DEFAULT_NM_OVERALLOCATION_ALLOCATION_THRESHOLD
-      = 0f;
+  /**
+   * General overallocation threshold if no resource-type-specific
+   * threshold is provided.
+   */
+  public static final String NM_OVERALLOCATION_GENERAL_THRESHOLD =
+      NM_PREFIX + "overallocation.general-utilization-threshold";
+  public static final float
+      DEFAULT_NM_OVERALLOCATION_GENERAL_THRESHOLD = -1.0f;
+  /**
+   * The maximum value of utilization threshold for all resource types
+   * up to which the scheduler allocates OPPORTUNISTIC containers.
+   */
   @Private
-  public static final float MAX_NM_OVERALLOCATION_ALLOCATION_THRESHOLD = 0.95f;
+  public static final float MAX_NM_OVERALLOCATION_THRESHOLD = 0.95f;
+
+  /**
+   * NM CPU utilization threshold up to which the scheduler allocates
+   * OPPORTUNISTIC containers after the node's capacity is fully allocated.
+   */
+  public static final String NM_OVERALLOCATION_CPU_UTILIZATION_THRESHOLD =
+      NM_PREFIX + "overallocation.cpu-utilization-threshold";
+
+  /**
+   * NM memory utilization threshold up to which the scheduler allocates
+   * OPPORTUNISTIC containers after the node's capacity is fully allocated.
+   */
+  public static final String NM_OVERALLOCATION_MEMORY_UTILIZATION_THRESHOLD =
+      NM_PREFIX + "overallocation.memory-utilization-threshold";
+
   public static final String NM_OVERALLOCATION_PREEMPTION_THRESHOLD =
       NM_PREFIX + "overallocation.preemption-threshold";
   public static final float DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD
-      = 0f;
+      = 0.96f;
 
   /**
    * Interval of time the linux container executor should try cleaning up

http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index fc5430a..27e1996 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1669,14 +1669,44 @@
 
   <property>
     <description>The extent of over-allocation (container-allocation based on
+      current utilization instead of prior allocation) allowed on this node 
that
+      applies to all resource types (expressed as a float between 0 and 0.95).
+      By default, over-allocation is turned off (value = -1). When turned on,
+      the node allows running OPPORTUNISTIC containers when the aggregate
+      utilization for each resource type is under the value specified here
+      multiplied by the node's advertised capacity. Note for each resource 
type,
+      it can be overridden by the type specific setting:
+      yarn.nodemanager.overallocation.cpu-utilization-threshold for CPU
+      yarn.nodemanager.overallocation.memory-utilization-threshold for memory
+    </description>
+    <name>yarn.nodemanager.overallocation.general-utilization-threshold</name>
+    <value>-1.0</value>
+  </property>
+
+  <property>
+    <description>The extent of over-allocation (container-allocation based on
       current utilization instead of prior allocation) allowed on this node,
-      expressed as a float between 0 and 0.95. By default, over-allocation is
-      turned off (value = 0). When turned on, the node allows running
-      OPPORTUNISTIC containers when the aggregate utilization is under the
+      in terms of the percentage of overall NM memory capacity utilized (
+      expressed as a float between 0 and 0.95). By default, over-allocation is
+      turned off (value = -1). When turned on, the node allows running
+      OPPORTUNISTIC containers only when the aggregate utilization is under the
       value specified here multiplied by the node's advertised capacity.
     </description>
-    <name>yarn.nodemanager.overallocation.allocation-threshold</name>
-    <value>0f</value>
+    <name>yarn.nodemanager.overallocation.memory-utilization-threshold</name>
+    
<value>${yarn.nodemanager.overallocation.general-utilization-threshold}</value>
+  </property>
+
+  <property>
+    <description>The extent of over-allocation (container-allocation based on
+      current utilization instead of prior allocation) allowed on this node,
+      in terms of the percentage of overall NM CPU capacity utilized (
+      expressed as a float between 0 and 0.95). By default, over-allocation is
+      turned off (value = -1). When turned on, the node allows running
+      OPPORTUNISTIC containers only when the aggregate utilization is under the
+      value specified here multiplied by the node's advertised capacity.
+    </description>
+    <name>yarn.nodemanager.overallocation.cpu-utilization-threshold</name>
+    
<value>${yarn.nodemanager.overallocation.general-utilization-threshold}</value>
   </property>
 
   <property>
@@ -1685,7 +1715,7 @@
       beyond which OPPORTUNISTIC containers should start getting preempted.
     </description>
     <name>yarn.nodemanager.overallocation.preemption-threshold</name>
-    <value>1</value>
+    <value>0.96</value>
   </property>
 
   <property>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java
index d57706a..c81e405 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java
@@ -28,10 +28,15 @@ import 
org.apache.hadoop.yarn.server.api.records.impl.pb.ResourceThresholdsPBImp
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
 public abstract class ResourceThresholds {
-  public static ResourceThresholds newInstance(float threshold) {
+  public static ResourceThresholds newInstance(float overallThreshold) {
+    return newInstance(overallThreshold, overallThreshold);
+  }
+
+  public static ResourceThresholds newInstance(float cpuThreshold,
+      float memoryThreshold) {
     ResourceThresholds thresholds = new ResourceThresholdsPBImpl();
-    thresholds.setMemoryThreshold(threshold);
-    thresholds.setCpuThreshold(threshold);
+    thresholds.setCpuThreshold(cpuThreshold);
+    thresholds.setMemoryThreshold(memoryThreshold);
     return thresholds;
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index a8e29bb..65091f7 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -240,25 +240,60 @@ public class ContainersMonitorImpl extends 
AbstractService implements
   }
 
   private void initializeOverAllocation(Configuration conf) {
-    float overAllocationTreshold = conf.getFloat(
-        YarnConfiguration.NM_OVERALLOCATION_ALLOCATION_THRESHOLD,
-        YarnConfiguration.DEFAULT_NM_OVERALLOCATION_ALLOCATION_THRESHOLD);
-    overAllocationTreshold = Math.min(overAllocationTreshold,
-        YarnConfiguration.MAX_NM_OVERALLOCATION_ALLOCATION_THRESHOLD);
-    overAllocationTreshold = Math.max(0, overAllocationTreshold);
-
-    if (overAllocationTreshold > 0f) {
-      ((NodeManager.NMContext) context).setOverAllocationInfo(
-          OverAllocationInfo.newInstance(
-              ResourceThresholds.newInstance(overAllocationTreshold)));
+    float generalResourceOverAllocationThreshold = conf.getFloat(
+        YarnConfiguration.NM_OVERALLOCATION_GENERAL_THRESHOLD,
+        YarnConfiguration.DEFAULT_NM_OVERALLOCATION_GENERAL_THRESHOLD);
+
+    float overAllocationMemoryUtilizationThreshold = conf.getFloat(
+        YarnConfiguration.NM_OVERALLOCATION_MEMORY_UTILIZATION_THRESHOLD,
+        generalResourceOverAllocationThreshold);
+    overAllocationMemoryUtilizationThreshold = Math.min(
+        overAllocationMemoryUtilizationThreshold,
+        YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD);
+    if (overAllocationMemoryUtilizationThreshold <= 0) {
+      LOG.info("NodeManager oversubscription is disabled because the memory " +
+          "utilization threshold is no larger than zero.");
+      return;
+    }
 
-      float preemptionThreshold = conf.getFloat(
-          YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_THRESHOLD,
-          YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD);
+    float overAllocationCpuUtilizationThreshold = conf.getFloat(
+        YarnConfiguration.NM_OVERALLOCATION_CPU_UTILIZATION_THRESHOLD,
+        generalResourceOverAllocationThreshold);
+    overAllocationCpuUtilizationThreshold = Math.min(
+        overAllocationCpuUtilizationThreshold,
+        YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD);
+    if (overAllocationCpuUtilizationThreshold <= 0) {
+      LOG.info("NodeManager oversubscription is disabled because the CPU " +
+          "utilization threshold is no larger than zero.");
+      return;
+    }
 
-      this.overAllocationPreemptionThresholds =
-          ResourceThresholds.newInstance(preemptionThreshold);
+    float preemptionThreshold = conf.getFloat(
+        YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_THRESHOLD,
+        YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD);
+    if (preemptionThreshold <= overAllocationCpuUtilizationThreshold) {
+      LOG.info("NodeManager oversubscription is disabled because preemption" +
+          "threshold is no larger than the cpu utilization threshold.");
+      return;
+    }
+    if (preemptionThreshold <= overAllocationMemoryUtilizationThreshold) {
+      LOG.info("NodeManager oversubscription is disabled because preemption" +
+          "threshold is no larger than the memory utilization threshold.");
+      return;
     }
+
+    ResourceThresholds resourceThresholds = ResourceThresholds.newInstance(
+        overAllocationCpuUtilizationThreshold,
+        overAllocationMemoryUtilizationThreshold);
+    ((NodeManager.NMContext) context).setOverAllocationInfo(
+        OverAllocationInfo.newInstance(resourceThresholds));
+    this.overAllocationPreemptionThresholds =
+        ResourceThresholds.newInstance(preemptionThreshold);
+
+    LOG.info("NodeManager oversubscription enabled with overallocation " +
+        "thresholds (memory:" + overAllocationMemoryUtilizationThreshold +
+        ", CPU:" + overAllocationCpuUtilizationThreshold + ") and preemption" +
+        " threshold: " + preemptionThreshold);
   }
 
   private boolean isResourceCalculatorAvailable() {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to