YARN-6739. Crash NM at start time if oversubscription is on but LinuxContainerExcutor or cgroup is off. Contributed by Haibo Chen.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3816007b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3816007b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3816007b Branch: refs/heads/YARN-1011 Commit: 3816007be5f447957706e7fb3a8cfd7047147bcf Parents: cc40e3a Author: Miklos Szegedi <szege...@apache.org> Authored: Mon Apr 2 15:09:52 2018 -0700 Committer: Haibo Chen <haiboc...@apache.org> Committed: Tue Jun 5 10:46:37 2018 -0700 ---------------------------------------------------------------------- .../monitor/ContainersMonitorImpl.java | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/3816007b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 946ef13..a133117 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.slf4j.Logger; @@ -219,6 +220,7 @@ public class ContainersMonitorImpl extends AbstractService implements initializeOverAllocation(conf); if (context.isOverAllocationEnabled()) { + checkOverAllocationPrerequisites(); pmemCheckEnabled = true; LOG.info("Force enabling physical memory checks because " + "overallocation is enabled"); @@ -258,6 +260,29 @@ public class ContainersMonitorImpl extends AbstractService implements super.serviceInit(this.conf); } + /** + * Check all prerequisites for NM over-allocation. + */ + private void checkOverAllocationPrerequisites() throws YarnException { + // LinuxContainerExecutor is required to enable overallocation + if (!(containerExecutor instanceof LinuxContainerExecutor)) { + throw new YarnException(LinuxContainerExecutor.class.getName() + + " is required for overallocation"); + } + if (ResourceHandlerModule.getCGroupsHandler() == null) { + throw new YarnException("CGroups must be enabled to support" + + " overallocation"); + } + if (ResourceHandlerModule.getCpuResourceHandler() == null) { + throw new YarnException( + "CGroups cpu isolation must be enabled to support overallocation"); + } + if (ResourceHandlerModule.getMemoryResourceHandler() == null) { + throw new YarnException( + "CGroups memory isolation must be enabled for overallocation"); + } + } + private boolean isContainerMonitorEnabled() { return conf.getBoolean(YarnConfiguration.NM_CONTAINER_MONITOR_ENABLED, YarnConfiguration.DEFAULT_NM_CONTAINER_MONITOR_ENABLED); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org