DevinLeamy commented on code in PR #556:
URL: https://github.com/apache/mesos/pull/556#discussion_r1569570036


##########
src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp:
##########
@@ -120,6 +144,569 @@ bool Cgroups2IsolatorProcess::supportsStandalone()
   return true;
 }
 
+
+Future<Option<ContainerLaunchInfo>> Cgroups2IsolatorProcess::prepare(
+    const ContainerID& containerId,
+    const ContainerConfig& containerConfig)
+{
+  if (containerId.has_parent()) {
+    return Failure("cgroups v2 does not support nested containers");
+  }
+
+  if (infos.contains(containerId)) {
+    return Failure(
+        "Container with id '" + stringify(containerId) + "' "
+        "has already been prepared");
+  }
+
+  CHECK(containerConfig.container_class() != ContainerClass::DEBUG);
+
+  // Create the non-leaf and leaf cgroups for the container, enable
+  // controllers in the non-leaf cgroup, and `prepare` each of the controllers.
+  const string& cgroup = cgroups2_paths::container(
+      flags.cgroups_root, containerId);
+  if (cgroups2::exists(cgroup)) {
+    return Failure("Cgroup '" + cgroup + "' already exists");
+  }
+
+  Try<Nothing> createCgroup = cgroups2::create(cgroup);
+  if (createCgroup.isError()) {
+    return Failure("Failed to create cgroup '" + cgroup + "': "
+                   + createCgroup.error());
+  }
+
+  const string& leafCgroup = cgroups2_paths::container(
+      flags.cgroups_root, containerId, true);
+  if (cgroups2::exists(leafCgroup)) {
+    return Failure("Cgroup '" + leafCgroup + "' already exists");
+  }
+
+  Try<Nothing> createLeafCgroup = cgroups2::create(leafCgroup);
+  if (createLeafCgroup.isError()) {
+    return Failure("Failed to create cgroup '" + leafCgroup + "': "
+                   + createLeafCgroup.error());
+  }
+
+  LOG(INFO) << "Created cgroups '" << cgroup << "' and '" << leafCgroup << "'";
+
+  infos[containerId] = Owned<Info>(new Info(containerId, cgroup));
+  vector<Future<Nothing>> prepares;
+  foreachvalue (const Owned<Controller>& controller, controllers) {
+    // Enable controllers in the non-leaf cgroup.
+    Try<Nothing> enable =
+      cgroups2::controllers::enable(cgroup, {controller->name()});
+
+    infos[containerId]->controllers.insert(controller->name());
+    prepares.push_back(
+        controller->prepare(containerId, cgroup, containerConfig));
+  }
+
+  return await(prepares)
+    .then(defer(
+        PID<Cgroups2IsolatorProcess>(this),
+        &Cgroups2IsolatorProcess::_prepare,
+        containerId,
+        containerConfig,
+        lambda::_1));
+}
+
+
+Future<Option<ContainerLaunchInfo>> Cgroups2IsolatorProcess::_prepare(
+    const ContainerID& containerId,
+    const ContainerConfig& containerConfig,
+    const vector<Future<Nothing>>& futures)
+{
+  vector<string> errors;
+  foreach (const Future<Nothing>& future, futures) {
+    if (!future.isReady()) {
+      errors.push_back(future.isFailed() ? future.failure() : "discarded");
+    }
+  }
+
+  if (!errors.empty()) {
+    return Failure(
+        "Failed to prepare controllers: " + strings::join(": ", errors));
+  }
+
+  return update(
+      containerId,
+      containerConfig.resources(),
+      containerConfig.limits())
+    .then(defer(
+        PID<Cgroups2IsolatorProcess>(this),
+        &Cgroups2IsolatorProcess::__prepare,
+        containerId,
+        containerConfig));
+}
+
+
+Future<Option<ContainerLaunchInfo>> Cgroups2IsolatorProcess::__prepare(
+    const ContainerID& containerId,
+    const ContainerConfig& containerConfig)
+{
+  // Only create cgroup mounts for containers with rootfs.
+  if (!containerConfig.has_rootfs()) {
+    return None();
+  }
+
+  Owned<Info> info = cgroupInfo(containerId);
+  if (!info.get()) {
+    return Failure(
+        "Failed to get cgroup for container "
+        "'" + stringify(containerId) + "'");
+  }
+
+  ContainerLaunchInfo launchInfo;
+  // Create a new cgroup namespace. The child process will only be able to
+  // see the cgroups that are in its cgroup subtree.
+  launchInfo.add_clone_namespaces(CLONE_NEWCGROUP);
+
+  // Create a new mount namespace and mount the root cgroup at /sys/fs/cgroup.
+  launchInfo.add_clone_namespaces(CLONE_NEWNS);
+  *launchInfo.add_mounts() = protobuf::slave::createContainerMount(
+      cgroups2::path(info->cgroup),
+      path::join(containerConfig.rootfs(), "/sys/fs/cgroup"),
+      MS_BIND | MS_REC);
+
+  return launchInfo;
+}
+
+
+Future<Nothing> Cgroups2IsolatorProcess::recover(
+    const vector<ContainerState>& states,
+    const hashset<ContainerID>& orphans)
+{
+  vector<Future<Nothing>> recovers;
+  foreach (const ContainerState& state, states) {
+    recovers.push_back(___recover(state.container_id()));
+  }
+
+  return await(recovers)
+    .then(defer(
+        PID<Cgroups2IsolatorProcess>(this),
+        &Cgroups2IsolatorProcess::_recover,
+        orphans,
+        lambda::_1));
+}
+
+
+Future<Nothing> Cgroups2IsolatorProcess::_recover(
+  const hashset<ContainerID>& orphans,
+  const vector<Future<Nothing>>& futures)
+{
+  vector<string> errors;
+  foreach (const Future<Nothing>& future, futures) {
+    if (!future.isReady()) {
+      errors.push_back(future.isFailed() ? future.failure() : "discarded");
+    }
+  }
+
+  if (errors.size() > 0) {
+    return Failure("Failed to recover active containers: " +
+                   strings::join(": ", errors));
+  }
+
+  hashset<ContainerID> knownOrphans;
+  hashset<ContainerID> unknownOrphans;
+
+  Try<set<string>> cgroups = cgroups2::get(flags.cgroups_root);
+  if (cgroups.isError()) {
+    return Failure("Failed to get cgroups under '" + flags.cgroups_root + "': "
+                   + cgroups.error());
+  }
+
+  foreach (const string& cgroup, *cgroups) {
+    if (cgroup == cgroups2_paths::agent(flags.cgroups_root)) {
+      continue;
+    }
+
+    Option<ContainerID> containerId = cgroups2_paths::containerId(
+        flags.cgroups_root, cgroup);
+    if (containerId.isNone()) {
+        LOG(INFO) << "Cgroup '" << cgroup << "' does not correspond to a "
+                  << "container id and will not be recovered";
+        continue;
+    }
+
+    if (infos.contains(*containerId)) {
+      // Container has already been recovered.
+      continue;
+    }
+
+    orphans.contains(*containerId) ?
+        knownOrphans.insert(*containerId) :
+        unknownOrphans.insert(*containerId);
+  }
+
+  vector<Future<Nothing>> recovers;
+  foreach (const ContainerID& containerId, knownOrphans) {
+    recovers.push_back(___recover(containerId));
+  }
+
+  foreach (const ContainerID& containerId, unknownOrphans) {
+    recovers.push_back(___recover(containerId));
+  }
+
+  return await(recovers)
+    .then(defer(
+        PID<Cgroups2IsolatorProcess>(this),
+        &Cgroups2IsolatorProcess::__recover,
+        unknownOrphans,
+        lambda::_1));
+}
+
+
+Future<Nothing> Cgroups2IsolatorProcess::__recover(
+    const hashset<ContainerID>& unknownOrphans,
+    const vector<Future<Nothing>>& futures)
+{
+  vector<string> errors;
+  foreach (const Future<Nothing>& future, futures) {
+    if (!future.isReady()) {
+      errors.push_back(future.isFailed() ? future.failure() : "discarded");
+    }
+  }
+  if (errors.size() > 0) {
+    return Failure(
+        "Failed to recover orphan containers: " +
+        strings::join(": ", errors));
+  }
+
+  // Known orphan cgroups will be destroyed by the containerizer using
+  // the normal cleanup path.
+  foreach (const ContainerID& containerId, unknownOrphans) {
+    LOG(INFO) << "Cleaning up unknown orphaned container " << containerId;
+    cleanup(containerId);
+  }
+
+  return Nothing();
+}
+
+
+Future<Nothing> Cgroups2IsolatorProcess::___recover(
+    const ContainerID& containerId)
+{
+  // Check if:
+  // 1. The non-leaf cgroup exists                    => Fail is not found.
+  // 2. The leaf cgroup exists                        => Fail is not found.
+  // 3. Each of the requested controllers are enabled => Log and continue.
+  //
+  // Failure modes:
+  // 1. Container fails during launch.
+  //    This can happen if the launcher fails to `fork`, 'this' isolator fails
+  //    to `prepare` or `isolate`, among other reasons. Cgroups may be
+  //    improperly configured meaning there may be missing cgroups or cgroup
+  //    control files that have the wrong values.
+  // 2. Container fails on destroy.
+  //    The container fails to be destroyed so cgroups may not have been
+  //    correctly cleaned up. This can result in orphan cgroups.
+  // 3. Mesos agent is restarted with different flags.
+  //    If the agent is started with new isolators the cgroups for the existing
+  //    containers, from a previous run, won't have all the requested
+  //    controllers enabled.

Review Comment:
   Provide more justification for why it is okay to bail here when there is a 
missing cgroup or cgroups. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to