YARN-6670 Add separate NM overallocation thresholds for cpu and memory (Haibo Chen)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/31a732ee Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/31a732ee Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/31a732ee Branch: refs/heads/YARN-1011 Commit: 31a732ee515a243a086046502e7086adb7ca8d52 Parents: ab91ebd Author: Haibo Chen <haiboc...@apache.org> Authored: Mon Jul 10 09:55:42 2017 -0700 Committer: Haibo Chen <haiboc...@apache.org> Committed: Sun Jan 28 17:43:50 2018 -0800 ---------------------------------------------------------------------- .../hadoop/yarn/conf/YarnConfiguration.java | 36 +++++++++-- .../src/main/resources/yarn-default.xml | 42 ++++++++++-- .../server/api/records/ResourceThresholds.java | 11 +++- .../monitor/ContainersMonitorImpl.java | 67 +++++++++++++++----- 4 files changed, 124 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index d8f4d34..ded0c1f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1949,17 +1949,39 @@ public class YarnConfiguration extends Configuration { public static final long DEFAULT_RM_APPLICATION_MONITOR_INTERVAL_MS = 3000; - /** Overallocation (= allocation based on utilization) configs. */ - public static final String NM_OVERALLOCATION_ALLOCATION_THRESHOLD = - NM_PREFIX + "overallocation.allocation-threshold"; - public static final float DEFAULT_NM_OVERALLOCATION_ALLOCATION_THRESHOLD - = 0f; + /** + * General overallocation threshold if no resource-type-specific + * threshold is provided. + */ + public static final String NM_OVERALLOCATION_GENERAL_THRESHOLD = + NM_PREFIX + "overallocation.general-utilization-threshold"; + public static final float + DEFAULT_NM_OVERALLOCATION_GENERAL_THRESHOLD = -1.0f; + /** + * The maximum value of utilization threshold for all resource types + * up to which the scheduler allocates OPPORTUNISTIC containers. + */ @Private - public static final float MAX_NM_OVERALLOCATION_ALLOCATION_THRESHOLD = 0.95f; + public static final float MAX_NM_OVERALLOCATION_THRESHOLD = 0.95f; + + /** + * NM CPU utilization threshold up to which the scheduler allocates + * OPPORTUNISTIC containers after the node's capacity is fully allocated. + */ + public static final String NM_OVERALLOCATION_CPU_UTILIZATION_THRESHOLD = + NM_PREFIX + "overallocation.cpu-utilization-threshold"; + + /** + * NM memory utilization threshold up to which the scheduler allocates + * OPPORTUNISTIC containers after the node's capacity is fully allocated. + */ + public static final String NM_OVERALLOCATION_MEMORY_UTILIZATION_THRESHOLD = + NM_PREFIX + "overallocation.memory-utilization-threshold"; + public static final String NM_OVERALLOCATION_PREEMPTION_THRESHOLD = NM_PREFIX + "overallocation.preemption-threshold"; public static final float DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD - = 0f; + = 0.96f; /** * Interval of time the linux container executor should try cleaning up http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index fc5430a..27e1996 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1669,14 +1669,44 @@ <property> <description>The extent of over-allocation (container-allocation based on + current utilization instead of prior allocation) allowed on this node that + applies to all resource types (expressed as a float between 0 and 0.95). + By default, over-allocation is turned off (value = -1). When turned on, + the node allows running OPPORTUNISTIC containers when the aggregate + utilization for each resource type is under the value specified here + multiplied by the node's advertised capacity. Note for each resource type, + it can be overridden by the type specific setting: + yarn.nodemanager.overallocation.cpu-utilization-threshold for CPU + yarn.nodemanager.overallocation.memory-utilization-threshold for memory + </description> + <name>yarn.nodemanager.overallocation.general-utilization-threshold</name> + <value>-1.0</value> + </property> + + <property> + <description>The extent of over-allocation (container-allocation based on current utilization instead of prior allocation) allowed on this node, - expressed as a float between 0 and 0.95. By default, over-allocation is - turned off (value = 0). When turned on, the node allows running - OPPORTUNISTIC containers when the aggregate utilization is under the + in terms of the percentage of overall NM memory capacity utilized ( + expressed as a float between 0 and 0.95). By default, over-allocation is + turned off (value = -1). When turned on, the node allows running + OPPORTUNISTIC containers only when the aggregate utilization is under the value specified here multiplied by the node's advertised capacity. </description> - <name>yarn.nodemanager.overallocation.allocation-threshold</name> - <value>0f</value> + <name>yarn.nodemanager.overallocation.memory-utilization-threshold</name> + <value>${yarn.nodemanager.overallocation.general-utilization-threshold}</value> + </property> + + <property> + <description>The extent of over-allocation (container-allocation based on + current utilization instead of prior allocation) allowed on this node, + in terms of the percentage of overall NM CPU capacity utilized ( + expressed as a float between 0 and 0.95). By default, over-allocation is + turned off (value = -1). When turned on, the node allows running + OPPORTUNISTIC containers only when the aggregate utilization is under the + value specified here multiplied by the node's advertised capacity. + </description> + <name>yarn.nodemanager.overallocation.cpu-utilization-threshold</name> + <value>${yarn.nodemanager.overallocation.general-utilization-threshold}</value> </property> <property> @@ -1685,7 +1715,7 @@ beyond which OPPORTUNISTIC containers should start getting preempted. </description> <name>yarn.nodemanager.overallocation.preemption-threshold</name> - <value>1</value> + <value>0.96</value> </property> <property> http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java index d57706a..c81e405 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/ResourceThresholds.java @@ -28,10 +28,15 @@ import org.apache.hadoop.yarn.server.api.records.impl.pb.ResourceThresholdsPBImp @InterfaceAudience.Private @InterfaceStability.Evolving public abstract class ResourceThresholds { - public static ResourceThresholds newInstance(float threshold) { + public static ResourceThresholds newInstance(float overallThreshold) { + return newInstance(overallThreshold, overallThreshold); + } + + public static ResourceThresholds newInstance(float cpuThreshold, + float memoryThreshold) { ResourceThresholds thresholds = new ResourceThresholdsPBImpl(); - thresholds.setMemoryThreshold(threshold); - thresholds.setCpuThreshold(threshold); + thresholds.setCpuThreshold(cpuThreshold); + thresholds.setMemoryThreshold(memoryThreshold); return thresholds; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/31a732ee/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index a8e29bb..65091f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -240,25 +240,60 @@ public class ContainersMonitorImpl extends AbstractService implements } private void initializeOverAllocation(Configuration conf) { - float overAllocationTreshold = conf.getFloat( - YarnConfiguration.NM_OVERALLOCATION_ALLOCATION_THRESHOLD, - YarnConfiguration.DEFAULT_NM_OVERALLOCATION_ALLOCATION_THRESHOLD); - overAllocationTreshold = Math.min(overAllocationTreshold, - YarnConfiguration.MAX_NM_OVERALLOCATION_ALLOCATION_THRESHOLD); - overAllocationTreshold = Math.max(0, overAllocationTreshold); - - if (overAllocationTreshold > 0f) { - ((NodeManager.NMContext) context).setOverAllocationInfo( - OverAllocationInfo.newInstance( - ResourceThresholds.newInstance(overAllocationTreshold))); + float generalResourceOverAllocationThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_GENERAL_THRESHOLD, + YarnConfiguration.DEFAULT_NM_OVERALLOCATION_GENERAL_THRESHOLD); + + float overAllocationMemoryUtilizationThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_MEMORY_UTILIZATION_THRESHOLD, + generalResourceOverAllocationThreshold); + overAllocationMemoryUtilizationThreshold = Math.min( + overAllocationMemoryUtilizationThreshold, + YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD); + if (overAllocationMemoryUtilizationThreshold <= 0) { + LOG.info("NodeManager oversubscription is disabled because the memory " + + "utilization threshold is no larger than zero."); + return; + } - float preemptionThreshold = conf.getFloat( - YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_THRESHOLD, - YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD); + float overAllocationCpuUtilizationThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_CPU_UTILIZATION_THRESHOLD, + generalResourceOverAllocationThreshold); + overAllocationCpuUtilizationThreshold = Math.min( + overAllocationCpuUtilizationThreshold, + YarnConfiguration.MAX_NM_OVERALLOCATION_THRESHOLD); + if (overAllocationCpuUtilizationThreshold <= 0) { + LOG.info("NodeManager oversubscription is disabled because the CPU " + + "utilization threshold is no larger than zero."); + return; + } - this.overAllocationPreemptionThresholds = - ResourceThresholds.newInstance(preemptionThreshold); + float preemptionThreshold = conf.getFloat( + YarnConfiguration.NM_OVERALLOCATION_PREEMPTION_THRESHOLD, + YarnConfiguration.DEFAULT_NM_OVERALLOCATION_PREEMPTION_THRESHOLD); + if (preemptionThreshold <= overAllocationCpuUtilizationThreshold) { + LOG.info("NodeManager oversubscription is disabled because preemption" + + "threshold is no larger than the cpu utilization threshold."); + return; + } + if (preemptionThreshold <= overAllocationMemoryUtilizationThreshold) { + LOG.info("NodeManager oversubscription is disabled because preemption" + + "threshold is no larger than the memory utilization threshold."); + return; } + + ResourceThresholds resourceThresholds = ResourceThresholds.newInstance( + overAllocationCpuUtilizationThreshold, + overAllocationMemoryUtilizationThreshold); + ((NodeManager.NMContext) context).setOverAllocationInfo( + OverAllocationInfo.newInstance(resourceThresholds)); + this.overAllocationPreemptionThresholds = + ResourceThresholds.newInstance(preemptionThreshold); + + LOG.info("NodeManager oversubscription enabled with overallocation " + + "thresholds (memory:" + overAllocationMemoryUtilizationThreshold + + ", CPU:" + overAllocationCpuUtilizationThreshold + ") and preemption" + + " threshold: " + preemptionThreshold); } private boolean isResourceCalculatorAvailable() { --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org