YARN-6705 Add separate NM preemption thresholds for cpu and memory (Haibo Chen)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e5a996e8 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e5a996e8 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e5a996e8 Branch: refs/heads/YARN-1011 Commit: e5a996e85778e08d80464d998c40b2493317314a Parents: 85a6887 Author: Haibo Chen <haiboc...@apache.org> Authored: Wed Jul 12 12:32:13 2017 -0700 Committer: Haibo Chen <haiboc...@apache.org> Committed: Thu Oct 19 21:26:14 2017 -0700 ---------------------------------------------------------------------- .../hadoop/yarn/conf/YarnConfiguration.java | 31 +++++++++++++-- .../src/main/resources/yarn-default.xml | 34 ++++++++++++++-- .../monitor/ContainersMonitorImpl.java | 42 +++++++++++++------- 3 files changed, 85 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5a996e8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index f312f24..c8ab62a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1860,10 +1860,33 @@ public class YarnConfiguration extends Configuration { public static final String NM_OVERALLOCATION_MEMORY_UTILIZATION_THRESHOLD = NM_PREFIX + "overallocation.memory-utilization-threshold"; - public static final String NM_OVERALLOCATION_PREEMPTION_THRESHOLD = - NM_PREFIX + "overallocation.preemption-threshold"; - public static final float DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD - = 0.96f; + /** + * The CPU utilization threshold, if went beyond for a few times in a row, + * OPPORTUNISTIC containers started due to overallocation should start + * getting preempted. + */ + public static final String NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD = + NM_PREFIX + "overallocation.preemption-threshold.cpu"; + public static final float + DEFAULT_NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD = 0.99f; + + /** + * The number of times that CPU utilization must go over the CPU preemption + * threshold consecutively before preemption starts to kick in. + */ + public static final String NM_OVERALLOCATION_PREEMPTION_CPU_COUNT = + NM_PREFIX + "overallocation.preemption-threshold-count.cpu"; + public static final int DEFAULT_NM_OVERALLOCATION_PREEMPTION_CPU_COUNT = 4; + + + /** + * The memory utilization threshold beyond which OPPORTUNISTIC containers + * started due to overallocation should start getting preempted. + */ + public static final String NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD = + NM_PREFIX + "overallocation.preemption-threshold.memory"; + public static final float + DEFAULT_NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD = 0.95f; /** * Interval of time the linux container executor should try cleaning up http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5a996e8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 3a98060..9e97ddf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1645,11 +1645,37 @@ <property> <description>When a node is over-allocated to improve utilization by - running OPPORTUNISTIC containers, this config captures the utilization - beyond which OPPORTUNISTIC containers should start getting preempted. + running OPPORTUNISTIC containers, this config captures the CPU + utilization beyond which OPPORTUNISTIC containers should start getting + preempted. This is used in combination with + yarn.nodemanager.overallocation.preemption-threshold-count.cpu, that is, + only when the CPU utilization goes over this threshold consecutively for + a few times will preemption kicks in. </description> - <name>yarn.nodemanager.overallocation.preemption-threshold</name> - <value>0.96</value> + <name>yarn.nodemanager.overallocation.preemption-threshold.cpu</name> + <value>0.99</value> + </property> + + <property> + <description>When a node is over-allocated to improve utilization by + running OPPORTUNISTIC containers, this config captures the number of + times that CPU utilization has to go above + ${yarn.nodemanager.overallocation.preemption-threshold.cpu} + consecutively for NM to start preempting OPPORTUNISTIC containers + started due to overallocation. + </description> + <name>yarn.nodemanager.overallocation.preemption-threshold-count.cpu</name> + <value>4</value> + </property> + + <property> + <description>When a node is over-allocated to improve utilization by + running OPPORTUNISTIC containers, this config captures the CPU + utilization beyond which OPPORTUNISTIC containers should start getting + preempted. + </description> + <name>yarn.nodemanager.overallocation.preemption-threshold.memory</name> + <value>0.95</value> </property> <property> http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5a996e8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 941997e..2c1e088 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -103,6 +103,7 @@ public class ContainersMonitorImpl extends AbstractService implements private ResourceUtilization containersUtilization; private ResourceThresholds overAllocationPreemptionThresholds; + private int overAlloctionPreemptionCpuCount = -1; private volatile boolean stopped = false; @@ -237,7 +238,7 @@ public class ContainersMonitorImpl extends AbstractService implements YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD); if (overAllocationMemoryUtilizationThreshold <= 0) { LOG.info("NodeManager oversubscription is disabled because the memory " + - "utilization threshold is no larger than zero."); + "overallocation threshold is no larger than zero."); return; } @@ -249,36 +250,49 @@ public class ContainersMonitorImpl extends AbstractService implements YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD); if (overAllocationCpuUtilizationThreshold <= 0) { LOG.info("NodeManager oversubscription is disabled because the CPU " + - "utilization threshold is no larger than zero."); + "overallocation threshold is no larger than zero."); return; } - float preemptionThreshold = conf.getFloat( - YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_THRESHOLD, - YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD); - if (preemptionThreshold <= overAllocationCpuUtilizationThreshold) { - LOG.info("NodeManager oversubscription is disabled because preemption" + - "threshold is no larger than the cpu utilization threshold."); + float cpuPreemptionThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD, + YarnConfiguration. + DEFAULT_NM_OVERALLOCATION_CPU_PREEMPTION_THRESHOLD); + if (cpuPreemptionThreshold <= overAllocationCpuUtilizationThreshold) { + LOG.info("NodeManager oversubscription is disabled because the cpu " + + " preemption threshold is no larger than the cpu overallocation" + + " threshold."); return; } - if (preemptionThreshold <= overAllocationMemoryUtilizationThreshold) { - LOG.info("NodeManager oversubscription is disabled because preemption" + - "threshold is no larger than the memory utilization threshold."); + + float memoryPreemptionThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD, + YarnConfiguration. + DEFAULT_NM_OVERALLOCATION_MEMORY_PREEMPTION_THRESHOLD); + if (memoryPreemptionThreshold <= overAllocationMemoryUtilizationThreshold) { + LOG.info("NodeManager oversubscription is disabled because the memory" + + " preemption threshold is no larger than the memory overallocation" + + " threshold."); return; } + this.overAlloctionPreemptionCpuCount = conf.getInt( + YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_CPU_COUNT, + YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_CPU_COUNT); + ResourceThresholds resourceThresholds = ResourceThresholds.newInstance( overAllocationCpuUtilizationThreshold, overAllocationMemoryUtilizationThreshold); ((NodeManager.NMContext) context).setOverAllocationInfo( OverAllocationInfo.newInstance(resourceThresholds)); - this.overAllocationPreemptionThresholds = - ResourceThresholds.newInstance(preemptionThreshold); + this.overAllocationPreemptionThresholds = ResourceThresholds.newInstance( + cpuPreemptionThreshold, memoryPreemptionThreshold); LOG.info("NodeManager oversubscription enabled with overallocation " + "thresholds (memory:" + overAllocationMemoryUtilizationThreshold + ", CPU:" + overAllocationCpuUtilizationThreshold + ") and preemption" + - " threshold: " + preemptionThreshold); + " threshold (memory:" + memoryPreemptionThreshold + ", CPU:" + + cpuPreemptionThreshold + ")"); } private boolean isResourceCalculatorAvailable() { --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org