[ https://issues.apache.org/jira/browse/YARN-11672?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17839590#comment-17839590 ]
ASF GitHub Bot commented on YARN-11672: --------------------------------------- tomicooler commented on code in PR #6734: URL: https://github.com/apache/hadoop/pull/6734#discussion_r1574502560 ########## hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java: ########## @@ -0,0 +1,576 @@ +/* + * * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * / + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public abstract class AbstractCGroupsHandler implements CGroupsHandler { + private static final Logger LOG = + LoggerFactory.getLogger(AbstractCGroupsHandler.class); + protected static final String MTAB_FILE = "/proc/mounts"; + + private final long deleteCGroupTimeout; + private final long deleteCGroupDelay; + private final Clock clock; + + protected final String mtabFile; + protected final CGroupsMountConfig cGroupsMountConfig; + protected final ReadWriteLock rwLock; + protected Map<CGroupController, String> controllerPaths; + protected Map<String, Set<String>> parsedMtab; + protected final PrivilegedOperationExecutor privilegedOperationExecutor; + protected final String cGroupPrefix; + + /** + * Create cgroup handler object. + * + * @param conf configuration + * @param privilegedOperationExecutor provides mechanisms to execute + * PrivilegedContainerOperations + * @param mtab mount file location + * @throws ResourceHandlerException if initialization failed + */ + AbstractCGroupsHandler(Configuration conf, PrivilegedOperationExecutor + privilegedOperationExecutor, String mtab) + throws ResourceHandlerException { + // Remove leading and trialing slash(es) + this.cGroupPrefix = conf.get(YarnConfiguration. + NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn") + .replaceAll("^/+", "").replaceAll("/+$", ""); + this.cGroupsMountConfig = new CGroupsMountConfig(conf); + this.deleteCGroupTimeout = conf.getLong( + YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT, + YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT) + + conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, + YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) + 1000; + this.deleteCGroupDelay = + conf.getLong(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_DELETE_DELAY, + YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_DELAY); + this.controllerPaths = new HashMap<>(); + this.parsedMtab = new HashMap<>(); + this.rwLock = new ReentrantReadWriteLock(); + this.privilegedOperationExecutor = privilegedOperationExecutor; + this.clock = SystemClock.getInstance(); + mtabFile = mtab; + init(); + } + + protected void init() throws ResourceHandlerException { + initializeControllerPaths(); + } + + @Override + public String getControllerPath(CGroupController controller) { + rwLock.readLock().lock(); + try { + return controllerPaths.get(controller); + } finally { + rwLock.readLock().unlock(); + } + } + + private void initializeControllerPaths() throws ResourceHandlerException { + // Cluster admins may have some subsystems mounted in specific locations + // We'll attempt to figure out mount points. We do this even if we plan + // to mount cgroups into our own tree to control the path permissions or + // to mount subsystems that are not mounted previously. + // The subsystems for new and existing mount points have to match, and + // the same hierarchy will be mounted at each mount point with the same + // subsystem set. + + Map<String, Set<String>> newMtab = null; + Map<CGroupController, String> cPaths; + try { + if (this.cGroupsMountConfig.mountDisabledButMountPathDefined()) { + newMtab = parsePreConfiguredMountPath(); + } + + if (newMtab == null) { + // parse mtab + newMtab = parseMtab(mtabFile); + } + + // find cgroup controller paths + cPaths = initializeControllerPathsFromMtab(newMtab); + } catch (IOException e) { + LOG.warn("Failed to initialize controller paths! Exception: " + e); + throw new ResourceHandlerException( + "Failed to initialize controller paths!"); + } + + // we want to do a bulk update without the paths changing concurrently + rwLock.writeLock().lock(); + try { + controllerPaths = cPaths; + parsedMtab = newMtab; + } finally { + rwLock.writeLock().unlock(); + } + } + + protected abstract Map<String, Set<String>> parsePreConfiguredMountPath() throws IOException; + + protected Map<CGroupController, String> initializeControllerPathsFromMtab( + Map<String, Set<String>> mtab) { + Map<CGroupController, String> ret = new HashMap<>(); + + for (CGroupController controller : getCGroupControllers()) { + String subsystemName = controller.getName(); + String controllerPath = findControllerInMtab(subsystemName, mtab); + + if (controllerPath != null) { + ret.put(controller, controllerPath); + } + } + return ret; + } + + protected abstract List<CGroupController> getCGroupControllers(); + + /* We are looking for entries of the form: + * none /cgroup/path/mem cgroup rw,memory 0 0 + * + * Use a simple pattern that splits on the five spaces, and + * grabs the 2, 3, and 4th fields. + */ + + private static final Pattern MTAB_FILE_FORMAT = Pattern.compile( + "^[^\\s]+\\s([^\\s]+)\\s([^\\s]+)\\s([^\\s]+)\\s[^\\s]+\\s[^\\s]+$"); + + /* + * Returns a map: path -> mount options + * for mounts with type "cgroup". Cgroup controllers will + * appear in the list of options for a path. + */ + protected Map<String, Set<String>> parseMtab(String mtab) + throws IOException { + Map<String, Set<String>> ret = new HashMap<>(); + BufferedReader in = null; + + try { + FileInputStream fis = new FileInputStream(mtab); + in = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)); + + for (String str = in.readLine(); str != null; + str = in.readLine()) { + Matcher m = MTAB_FILE_FORMAT.matcher(str); + boolean mat = m.find(); + if (mat) { + String path = m.group(1); + String type = m.group(2); + String options = m.group(3); + + Set<String> controllerSet = handleMtabEntry(path, type, options); + if (controllerSet != null) { + ret.put(path, controllerSet); + } + } + } + } catch (IOException e) { + if (Shell.LINUX) { + throw new IOException("Error while reading " + mtab, e); + } else { + // Ignore the error, if we are running on an os other than Linux + LOG.warn("Error while reading " + mtab, e); + } + } finally { + IOUtils.cleanupWithLogger(LOG, in); + } + + return ret; + } + + protected abstract Set<String> handleMtabEntry(String path, String type, String options) + throws IOException; + + /** + * Find the hierarchy of the subsystem. + * The kernel ensures that a subsystem can only be part of a single hierarchy. + * The subsystem can be part of multiple mount points, if they belong to the + * same hierarchy. + * + * @param controller subsystem like cpu, cpuset, etc... + * @param entries map of paths to mount options + * @return the first mount path that has the requested subsystem + */ + protected String findControllerInMtab(String controller, + Map<String, Set<String>> entries) { + for (Map.Entry<String, Set<String>> e : entries.entrySet()) { + if (e.getValue().contains(controller)) { + if (new File(e.getKey()).canRead()) { + return e.getKey(); + } else { + LOG.warn(String.format( + "Skipping inaccessible cgroup mount point %s", e.getKey())); + } + } + } + + return null; + } + + protected abstract void mountCGroupController(CGroupController controller) + throws ResourceHandlerException; + + @Override + public String getRelativePathForCGroup(String cGroupId) { + return cGroupPrefix + Path.SEPARATOR + cGroupId; + } + + @Override + public String getPathForCGroup(CGroupController controller, String cGroupId) { + return getControllerPath(controller) + Path.SEPARATOR + cGroupPrefix + + Path.SEPARATOR + cGroupId; + } + + @Override + public String getPathForCGroupTasks(CGroupController controller, + String cGroupId) { + return getPathForCGroup(controller, cGroupId) + + Path.SEPARATOR + CGROUP_PROCS_FILE; + } + + @Override + public String getPathForCGroupParam(CGroupController controller, + String cGroupId, String param) { + return getPathForCGroup(controller, cGroupId) + + Path.SEPARATOR + controller.getName() + + "." + param; + } + + /** + * Mount cgroup or use existing mount point based on configuration. + * + * @param controller - the controller being initialized + * @throws ResourceHandlerException yarn hierarchy cannot be created or + * accessed for any reason + */ + @Override + public void initializeCGroupController(CGroupController controller) throws + ResourceHandlerException { + if (this.cGroupsMountConfig.isMountEnabled() && + cGroupsMountConfig.ensureMountPathIsDefined()) { + // We have a controller that needs to be mounted + mountCGroupController(controller); + } + + // We are working with a pre-mounted contoller + // Make sure that YARN cgroup hierarchy path exists + initializePreMountedCGroupController(controller); + } + + /** + * This function is called when the administrator opted + * to use a pre-mounted cgroup controller. + * There are two options. + * 1. YARN hierarchy already exists. We verify, whether we have write access + * in this case. + * 2. YARN hierarchy does not exist, yet. We create it in this case. If cgroup v2 is used + * an additional step is required to update the cgroup.subtree_control file, see + * {@link CGroupsV2HandlerImpl#updateEnabledControllersInHierarchy} + * + * @param controller the controller being initialized + * @throws ResourceHandlerException yarn hierarchy cannot be created or + * accessed for any reason + */ + private void initializePreMountedCGroupController(CGroupController controller) + throws ResourceHandlerException { + // Check permissions to cgroup hierarchy and + // create YARN cgroup if it does not exist, yet + String controllerPath = getControllerPath(controller); + + if (controllerPath == null) { + throw new ResourceHandlerException( + String.format("Controller %s not mounted." + + " You either need to mount it with %s" + + " or mount cgroups before launching Yarn", + controller.getName(), YarnConfiguration. + NM_LINUX_CONTAINER_CGROUPS_MOUNT)); + } + + File rootHierarchy = new File(controllerPath); + File yarnHierarchy = new File(rootHierarchy, cGroupPrefix); + String subsystemName = controller.getName(); + + LOG.info("Initializing mounted controller " + controller.getName() + " " + + "at " + yarnHierarchy); + + if (!rootHierarchy.exists()) { + throw new ResourceHandlerException(getErrorWithDetails( + "Cgroups mount point does not exist or not accessible", + subsystemName, + rootHierarchy.getAbsolutePath() + )); + } else if (!yarnHierarchy.exists()) { + LOG.info("Yarn control group does not exist. Creating " + + yarnHierarchy.getAbsolutePath()); + try { + if (!yarnHierarchy.mkdir()) { + // Unexpected: we just checked that it was missing + throw new ResourceHandlerException(getErrorWithDetails( + "Unexpected: Cannot create yarn cgroup", + subsystemName, + yarnHierarchy.getAbsolutePath() + )); + } + } catch (SecurityException e) { + throw new ResourceHandlerException(getErrorWithDetails( + "No permissions to create yarn cgroup", + subsystemName, + yarnHierarchy.getAbsolutePath() + ), e); + } + } else if (!FileUtil.canWrite(yarnHierarchy)) { + throw new ResourceHandlerException(getErrorWithDetails( + "Yarn control group not writable", + subsystemName, + yarnHierarchy.getAbsolutePath() + )); + } + + try { + updateEnabledControllersInHierarchy(yarnHierarchy); Review Comment: We have a chain of `ResourceHandler`(s) (`cpu`, `io`, ...) and the `.boostrap()` is called on each of them, where they call the `initializeCGroupController` with the actual controller (e.g. `cpu`) which then ends up here. But for cgroup v2 we initialise all valid cgroupv2 controller **at once** regardless of the configuration. ``` private static void initializeConfiguredResourceHandlerChain( Configuration conf, Context nmContext) throws ResourceHandlerException { ArrayList<ResourceHandler> handlerList = new ArrayList<>(); addHandlerIfNotNull(handlerList, initNetworkResourceHandler(conf)); addHandlerIfNotNull(handlerList, initDiskResourceHandler(conf)); addHandlerIfNotNull(handlerList, initMemoryResourceHandler(conf)); addHandlerIfNotNull(handlerList, initCGroupsCpuResourceHandler(conf)); addHandlerIfNotNull(handlerList, getNumaResourceHandler(conf, nmContext)); addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext); resourceHandlerChain = new ResourceHandlerChain(handlerList); } ``` Shouldn't the `updateEnabledControllersInHierarchy` be called only for a valid cgroupv2 controller and only enable that controller (if it is not already enabled)? ``` if (controller.isInV2()) { updateEnabledControllersInHierarchy(yarnHierarchy, controller); } ``` Since this is an abstract class, the if shouldn't really be here but in the v2 implementation. This is just an example. > Create a CgroupHandler implementation for cgroup v2 > --------------------------------------------------- > > Key: YARN-11672 > URL: https://issues.apache.org/jira/browse/YARN-11672 > Project: Hadoop YARN > Issue Type: Sub-task > Reporter: Benjamin Teke > Assignee: Benjamin Teke > Priority: Major > Labels: pull-request-available > > [CGroupsHandler's|https://github.com/apache/hadoop/blob/69b328943edf2f61c8fc139934420e3f10bf3813/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java#L36] > current implementation holds the functionality to mount and setup the YARN > specific cgroup v1 functionality. A similar v2 implementation should be > created that allows initialising the v2 structure. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org