YARN-6705 Add separate NM preemption thresholds for cpu and memory  (Haibo Chen)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e5a996e8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e5a996e8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e5a996e8

Branch: refs/heads/YARN-1011
Commit: e5a996e85778e08d80464d998c40b2493317314a
Parents: 85a6887
Author: Haibo Chen <haiboc...@apache.org>
Authored: Wed Jul 12 12:32:13 2017 -0700
Committer: Haibo Chen <haiboc...@apache.org>
Committed: Thu Oct 19 21:26:14 2017 -0700

----------------------------------------------------------------------
 .../hadoop/yarn/conf/YarnConfiguration.java     | 31 +++++++++++++--
 .../src/main/resources/yarn-default.xml         | 34 ++++++++++++++--
 .../monitor/ContainersMonitorImpl.java          | 42 +++++++++++++-------
 3 files changed, 85 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5a996e8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index f312f24..c8ab62a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1860,10 +1860,33 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_OVERALLOCATION_MEMORY_UTILIZATION_THRESHOLD =
       NM_PREFIX + "overallocation.memory-utilization-threshold";
 
-  public static final String NM_OVERALLOCATION_PREEMPTION_THRESHOLD =
-      NM_PREFIX + "overallocation.preemption-threshold";
-  public static final float DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD
-      = 0.96f;
+  /**
+   * The CPU utilization threshold, if went beyond for a few times in a row,
+   * OPPORTUNISTIC containers started due to overallocation should start
+   * getting preempted.
+   */
+  public static final String NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD =
+      NM_PREFIX + "overallocation.preemption-threshold.cpu";
+  public static final float
+      DEFAULT_NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD = 0.99f;
+
+  /**
+   * The number of times that CPU utilization must go over the CPU preemption
+   * threshold consecutively before preemption starts to kick in.
+   */
+  public static final String NM_OVERALLOCATION_PREEMPTION_CPU_COUNT =
+      NM_PREFIX + "overallocation.preemption-threshold-count.cpu";
+  public static final int DEFAULT_NM_OVERALLOCATION_PREEMPTION_CPU_COUNT = 4;
+
+
+  /**
+   * The memory utilization threshold beyond which OPPORTUNISTIC containers
+   * started due to overallocation should start getting preempted.
+   */
+  public static final String NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD =
+      NM_PREFIX + "overallocation.preemption-threshold.memory";
+  public static final float
+      DEFAULT_NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD = 0.95f;
 
   /**
    * Interval of time the linux container executor should try cleaning up

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5a996e8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 3a98060..9e97ddf 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1645,11 +1645,37 @@
 
   <property>
     <description>When a node is over-allocated to improve utilization by
-      running OPPORTUNISTIC containers, this config captures the utilization
-      beyond which OPPORTUNISTIC containers should start getting preempted.
+      running OPPORTUNISTIC containers, this config captures the CPU
+      utilization beyond which OPPORTUNISTIC containers should start getting
+      preempted. This is used in combination with
+      yarn.nodemanager.overallocation.preemption-threshold-count.cpu, that is,
+      only when the CPU utilization goes over this threshold consecutively for
+      a few times will preemption kicks in.
     </description>
-    <name>yarn.nodemanager.overallocation.preemption-threshold</name>
-    <value>0.96</value>
+    <name>yarn.nodemanager.overallocation.preemption-threshold.cpu</name>
+    <value>0.99</value>
+  </property>
+
+  <property>
+    <description>When a node is over-allocated to improve utilization by
+      running OPPORTUNISTIC containers, this config captures the number of
+      times that CPU utilization has to go above
+      ${yarn.nodemanager.overallocation.preemption-threshold.cpu}
+      consecutively for NM to start preempting OPPORTUNISTIC containers
+      started due to overallocation.
+    </description>
+    <name>yarn.nodemanager.overallocation.preemption-threshold-count.cpu</name>
+    <value>4</value>
+  </property>
+
+  <property>
+    <description>When a node is over-allocated to improve utilization by
+      running OPPORTUNISTIC containers, this config captures the CPU
+      utilization beyond which OPPORTUNISTIC containers should start getting
+      preempted.
+    </description>
+    <name>yarn.nodemanager.overallocation.preemption-threshold.memory</name>
+    <value>0.95</value>
   </property>
 
   <property>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5a996e8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index 941997e..2c1e088 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -103,6 +103,7 @@ public class ContainersMonitorImpl extends AbstractService 
implements
   private ResourceUtilization containersUtilization;
 
   private ResourceThresholds overAllocationPreemptionThresholds;
+  private int overAlloctionPreemptionCpuCount = -1;
 
   private volatile boolean stopped = false;
 
@@ -237,7 +238,7 @@ public class ContainersMonitorImpl extends AbstractService 
implements
         YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD);
     if (overAllocationMemoryUtilizationThreshold <= 0) {
       LOG.info("NodeManager oversubscription is disabled because the memory " +
-          "utilization threshold is no larger than zero.");
+          "overallocation threshold is no larger than zero.");
       return;
     }
 
@@ -249,36 +250,49 @@ public class ContainersMonitorImpl extends 
AbstractService implements
         YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD);
     if (overAllocationCpuUtilizationThreshold <= 0) {
       LOG.info("NodeManager oversubscription is disabled because the CPU " +
-          "utilization threshold is no larger than zero.");
+          "overallocation threshold is no larger than zero.");
       return;
     }
 
-    float preemptionThreshold = conf.getFloat(
-        YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_THRESHOLD,
-        YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD);
-    if (preemptionThreshold <= overAllocationCpuUtilizationThreshold) {
-      LOG.info("NodeManager oversubscription is disabled because preemption" +
-          "threshold is no larger than the cpu utilization threshold.");
+    float cpuPreemptionThreshold = conf.getFloat(
+        YarnConfiguration.NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD,
+        YarnConfiguration.
+            DEFAULT_NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD);
+    if (cpuPreemptionThreshold <= overAllocationCpuUtilizationThreshold) {
+      LOG.info("NodeManager oversubscription is disabled because the cpu " +
+          " preemption threshold is no larger than the cpu overallocation" +
+          " threshold.");
       return;
     }
-    if (preemptionThreshold <= overAllocationMemoryUtilizationThreshold) {
-      LOG.info("NodeManager oversubscription is disabled because preemption" +
-          "threshold is no larger than the memory utilization threshold.");
+
+    float memoryPreemptionThreshold = conf.getFloat(
+        YarnConfiguration.NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD,
+        YarnConfiguration.
+            DEFAULT_NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD);
+    if (memoryPreemptionThreshold <= overAllocationMemoryUtilizationThreshold) 
{
+      LOG.info("NodeManager oversubscription is disabled because the memory" +
+          " preemption threshold is no larger than the memory overallocation" +
+          " threshold.");
       return;
     }
 
+    this.overAlloctionPreemptionCpuCount = conf.getInt(
+        YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_CPU_COUNT,
+        YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_CPU_COUNT);
+
     ResourceThresholds resourceThresholds = ResourceThresholds.newInstance(
         overAllocationCpuUtilizationThreshold,
         overAllocationMemoryUtilizationThreshold);
     ((NodeManager.NMContext) context).setOverAllocationInfo(
         OverAllocationInfo.newInstance(resourceThresholds));
-    this.overAllocationPreemptionThresholds =
-        ResourceThresholds.newInstance(preemptionThreshold);
+    this.overAllocationPreemptionThresholds = ResourceThresholds.newInstance(
+        cpuPreemptionThreshold, memoryPreemptionThreshold);
 
     LOG.info("NodeManager oversubscription enabled with overallocation " +
         "thresholds (memory:" + overAllocationMemoryUtilizationThreshold +
         ", CPU:" + overAllocationCpuUtilizationThreshold + ") and preemption" +
-        " threshold: " + preemptionThreshold);
+        " threshold (memory:" + memoryPreemptionThreshold + ", CPU:" +
+        cpuPreemptionThreshold + ")");
   }
 
   private boolean isResourceCalculatorAvailable() {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to