bmahler commented on code in PR #556:
URL: https://github.com/apache/mesos/pull/556#discussion_r1575260114
##########
src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp:
##########
@@ -120,6 +144,622 @@ bool Cgroups2IsolatorProcess::supportsStandalone()
return true;
}
+
+Future<Option<ContainerLaunchInfo>> Cgroups2IsolatorProcess::prepare(
+ const ContainerID& containerId,
+ const ContainerConfig& containerConfig)
+{
+ if (containerId.has_parent()) {
+ return Failure("cgroups v2 does not support nested containers");
+ }
+
+ if (infos.contains(containerId)) {
+ return Failure(
+ "Container with id '" + stringify(containerId) + "' "
+ "has already been prepared");
+ }
+
+ CHECK(containerConfig.container_class() != ContainerClass::DEBUG);
+
+ // Create the non-leaf and leaf cgroups for the container, enable
+ // controllers in the non-leaf cgroup, and `prepare` each of the controllers.
+ const string& nonLeafCgroup = cgroups2_paths::container(
+ flags.cgroups_root, containerId);
+ if (cgroups2::exists(nonLeafCgroup)) {
+ return Failure("Cgroup '" + nonLeafCgroup + "' already exists");
+ }
+
+ Try<Nothing> createNonLeafCgroup = cgroups2::create(nonLeafCgroup);
+ if (createNonLeafCgroup.isError()) {
+ return Failure("Failed to create cgroup '" + nonLeafCgroup + "': "
+ + createNonLeafCgroup.error());
+ }
+
+ const string& leafCgroup = cgroups2_paths::container(
+ flags.cgroups_root, containerId, true);
+ if (cgroups2::exists(leafCgroup)) {
+ return Failure("Cgroup '" + leafCgroup + "' already exists");
+ }
+
+ Try<Nothing> createLeafCgroup = cgroups2::create(leafCgroup);
+ if (createLeafCgroup.isError()) {
+ return Failure("Failed to create cgroup '" + leafCgroup + "': "
+ + createLeafCgroup.error());
+ }
+
+ LOG(INFO) << "Created cgroups '" << nonLeafCgroup << "' "
+ << "and '" << leafCgroup << "'";
+
+ infos[containerId] = Owned<Info>(new Info(containerId, nonLeafCgroup));
+ vector<Future<Nothing>> prepares;
+ foreachvalue (const Owned<Controller>& controller, controllers) {
+ if (controller->name() == "core") {
+ // The "core" controller is always enabled because the "cgroup.*" control
+ // files exist for all cgroups. Additionally, since "core" isn't a
+ // valid controller name (i.e. it doesn't exist in "cgroup.controllers"),
+ // calling `cgroups2::controllers::enable` with the "core" cgroup will
+ // fail with "Invalid argument".
+ //
+ // For that reason, we skip enabling the "core" controller here.
+ continue;
+ }
+
+ Try<Nothing> enableInNonLeaf =
+ cgroups2::controllers::enable(nonLeafCgroup, {controller->name()});
+ if (enableInNonLeaf.isError()) {
+ return Failure(
+ "Failed to enable controller '" + controller->name() + "' "
+ "in cgroup '" + nonLeafCgroup + "': " + enableInNonLeaf.error());
+ }
+
+ // We enable controllers in the leaf cgroup to allow the container process
+ // to manage their own cgroups, if they choose.
+ Try<Nothing> enableInLeaf =
+ cgroups2::controllers::enable(leafCgroup, {controller->name()});
+ if (enableInLeaf.isError()) {
+ return Failure(
+ "Failed to enable controllers in cgroup '" + nonLeafCgroup + "': "
+ + enableInLeaf.error());
+ }
+
+ infos[containerId]->controllers.insert(controller->name());
+ prepares.push_back(
+ controller->prepare(containerId, nonLeafCgroup, containerConfig));
+ }
+
+ return await(prepares)
+ .then(defer(
+ PID<Cgroups2IsolatorProcess>(this),
+ &Cgroups2IsolatorProcess::_prepare,
+ containerId,
+ containerConfig,
+ lambda::_1));
+}
+
+
+Future<Option<ContainerLaunchInfo>> Cgroups2IsolatorProcess::_prepare(
+ const ContainerID& containerId,
+ const ContainerConfig& containerConfig,
+ const vector<Future<Nothing>>& futures)
+{
+ vector<string> errors;
+ foreach (const Future<Nothing>& future, futures) {
+ if (!future.isReady()) {
+ errors.push_back(future.isFailed() ? future.failure() : "discarded");
+ }
+ }
+
+ if (!errors.empty()) {
+ return Failure(
+ "Failed to prepare controllers: " + strings::join(": ", errors));
+ }
+
+ return update(
+ containerId,
+ containerConfig.resources(),
+ containerConfig.limits())
+ .then(defer(
+ PID<Cgroups2IsolatorProcess>(this),
+ &Cgroups2IsolatorProcess::__prepare,
+ containerId,
+ containerConfig));
+}
+
+
+Future<Option<ContainerLaunchInfo>> Cgroups2IsolatorProcess::__prepare(
+ const ContainerID& containerId,
+ const ContainerConfig& containerConfig)
+{
+ // Only create cgroup mounts for containers with rootfs.
+ if (!containerConfig.has_rootfs()) {
+ return None();
+ }
+
+ Owned<Info> info = cgroupInfo(containerId);
+ if (!info.get()) {
+ return Failure(
+ "Failed to get cgroup for container "
+ "'" + stringify(containerId) + "'");
+ }
+
+ ContainerLaunchInfo launchInfo;
+ // Create a new cgroup namespace. The child process will only be able to
+ // see the cgroups that are in its cgroup subtree.
+ launchInfo.add_clone_namespaces(CLONE_NEWCGROUP);
+
+ // Create a new mount namespace and mount the root cgroup at /sys/fs/cgroup.
+ launchInfo.add_clone_namespaces(CLONE_NEWNS);
+ *launchInfo.add_mounts() = protobuf::slave::createContainerMount(
+ cgroups2::path(info->cgroup),
+ path::join(containerConfig.rootfs(), "/sys/fs/cgroup"),
+ MS_BIND | MS_REC);
Review Comment:
Is this the right way to mount?
* Are we supposed to pass a cgroup2 type rather than only doing paths here?
* We're mounting in the non-leaf, but it feels like we should be mounting in
the leaf if the process is going to create sub-cgroups and self manage,
otherwise they can create other container/foo cgroups, rather than only
container/leaf/foo cgroups.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]