[lxc-devel] [lxc/master] cgroup2: rework controller delegation

brauner on Github Fri, 06 Dec 2019 06:41:38 -0800

The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxc/pull/3215


This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>

From c581d2a6732fa91f57731d9217004f871e80a2de Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Fri, 6 Dec 2019 09:42:47 +0100
Subject: [PATCH] cgroup2: rework controller delegation

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 src/lxc/cgroups/cgfsng.c | 239 +++++++++++++++++++++------------------
 src/lxc/cgroups/cgroup.c |   1 +
 src/lxc/cgroups/cgroup.h |  12 +-
 src/lxc/lxccontainer.c   |  34 +++---
 src/lxc/macro.h          |   6 +
 src/lxc/start.c          |  40 +++++--
 src/lxc/start.h          |   3 +
 7 files changed, 194 insertions(+), 141 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 9541031828..d5ddc8388d 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1184,71 +1184,6 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct 
cgroup_ops *ops,
        }
 }
 
-static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
-{
-       __do_free char *add_controllers = NULL, *cgroup = NULL;
-       size_t i, parts_len;
-       char **it;
-       size_t full_len = 0;
-       char **parts = NULL;
-       bool bret = false;
-
-       if (h->version != CGROUP2_SUPER_MAGIC)
-               return true;
-
-       if (!h->controllers)
-               return true;
-
-       /* For now we simply enable all controllers that we have detected by
-        * creating a string like "+memory +pids +cpu +io".
-        * TODO: In the near future we might want to support "-<controller>"
-        * etc. but whether supporting semantics like this make sense will need
-        * some thinking.
-        */
-       for (it = h->controllers; it && *it; it++) {
-               full_len += strlen(*it) + 2;
-               add_controllers = must_realloc(add_controllers, full_len + 1);
-
-               if (h->controllers[0] == *it)
-                       add_controllers[0] = '\0';
-
-               (void)strlcat(add_controllers, "+", full_len + 1);
-               (void)strlcat(add_controllers, *it, full_len + 1);
-
-               if ((it + 1) && *(it + 1))
-                       (void)strlcat(add_controllers, " ", full_len + 1);
-       }
-
-       parts = lxc_string_split(cgname, '/');
-       if (!parts)
-               goto on_error;
-
-       parts_len = lxc_array_len((void **)parts);
-       if (parts_len > 0)
-               parts_len--;
-
-       cgroup = must_make_path(h->mountpoint, h->container_base_path, NULL);
-       for (i = 0; i < parts_len; i++) {
-               int ret;
-               __do_free char *target = NULL;
-
-               cgroup = must_append_path(cgroup, parts[i], NULL);
-               target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
-               ret = lxc_write_to_file(target, add_controllers, full_len, 
false, 0666);
-               if (ret < 0) {
-                       SYSERROR("Could not enable \"%s\" controllers in the "
-                                "unified cgroup \"%s\"", add_controllers, 
cgroup);
-                       goto on_error;
-               }
-       }
-
-       bret = true;
-
-on_error:
-       lxc_free_array((void **)parts, free);
-       return bret;
-}
-
 static int mkdir_eexist_on_last(const char *dir, mode_t mode)
 {
        const char *tmp = dir;
@@ -1298,7 +1233,7 @@ static bool monitor_create_path_for_hierarchy(struct 
hierarchy *h, char *cgname)
                return false;
        }
 
-       return cg_unified_create_cgroup(h, cgname);
+       return true;
 }
 
 static bool container_create_path_for_hierarchy(struct hierarchy *h, char 
*cgname)
@@ -1317,7 +1252,7 @@ static bool container_create_path_for_hierarchy(struct 
hierarchy *h, char *cgnam
                return false;
        }
 
-       return cg_unified_create_cgroup(h, cgname);
+       return true;
 }
 
 static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname, bool 
monitor)
@@ -1400,6 +1335,7 @@ __cgfsng_ops static inline bool 
cgfsng_monitor_create(struct cgroup_ops *ops,
                return false;
 
        INFO("The monitor process uses \"%s\" as cgroup", monitor_cgroup);
+       ops->monitor_cgroup = move_ptr(monitor_cgroup);
        return true;
 }
 
@@ -1479,47 +1415,66 @@ __cgfsng_ops static inline bool 
cgfsng_payload_create(struct cgroup_ops *ops,
        return true;
 }
 
-__cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
-                                            bool monitor)
+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
+                                             struct lxc_handler *handler)
 {
-       int len;
-       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+       int monitor_len, transient_len;
+       char monitor[INTTYPE_TO_STRLEN(pid_t)],
+           transient[INTTYPE_TO_STRLEN(pid_t)];
 
        if (!ops->hierarchies)
                return true;
 
-       len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
-       if (len < 0 || (size_t)len >= sizeof(pidstr))
-               return false;
+       monitor_len = snprintf(monitor, sizeof(monitor), "%d", 
handler->monitor_pid);
+       if (handler->transient_pid > 0)
+               transient_len = snprintf(transient, sizeof(transient), "%d",
+                                        handler->transient_pid);
 
        for (int i = 0; ops->hierarchies[i]; i++) {
-               int ret;
                __do_free char *path = NULL;
+               int ret;
 
-               if (monitor)
-                       path = 
must_make_path(ops->hierarchies[i]->monitor_full_path,
-                                             "cgroup.procs", NULL);
-               else
-                       path = 
must_make_path(ops->hierarchies[i]->container_full_path,
-                                             "cgroup.procs", NULL);
-               ret = lxc_write_to_file(path, pidstr, len, false, 0666);
-               if (ret != 0) {
-                       SYSERROR("Failed to enter cgroup \"%s\"", path);
-                       return false;
-               }
+               path = must_make_path(ops->hierarchies[i]->monitor_full_path,
+                                     "cgroup.procs", NULL);
+               ret = lxc_writeat(-1, path, monitor, monitor_len);
+               if (ret != 0)
+                       return log_error_errno(false, errno, "Failed to enter 
cgroup \"%s\"", path);
+
+                if (handler->transient_pid < 0)
+                       return true;
+
+               ret = lxc_writeat(-1, path, transient, transient_len);
+               if (ret != 0)
+                       return log_error_errno(false, errno, "Failed to enter 
cgroup \"%s\"", path);
        }
+       handler->transient_pid = -1;
 
        return true;
 }
 
-__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t 
pid)
+__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
+                                             struct lxc_handler *handler)
 {
-       return __do_cgroup_enter(ops, pid, true);
-}
+       int len;
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
 
-static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
-{
-       return __do_cgroup_enter(ops, pid, false);
+       if (!ops->hierarchies)
+               return true;
+
+       len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
+
+       for (int i = 0; ops->hierarchies[i]; i++) {
+               __do_free char *path = NULL;
+               int ret;
+
+               path = must_make_path(ops->hierarchies[i]->container_full_path,
+                                     "cgroup.procs", NULL);
+               ret = lxc_writeat(-1, path, pidstr, len);
+               if (ret != 0)
+                       return log_error_errno(false, errno, "Failed to enter 
cgroup \"%s\"", path);
+       }
+
+       return true;
 }
 
 static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
@@ -2625,11 +2580,12 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, 
const char *filename,
        return ret;
 }
 
-static bool __cg_legacy_setup_limits(struct cgroup_ops *ops,
-                                    struct lxc_list *cgroup_settings,
-                                    bool do_devices)
+__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
+                                                   struct lxc_conf *conf,
+                                                   bool do_devices)
 {
        __do_free struct lxc_list *sorted_cgroup_settings = NULL;
+       struct lxc_list *cgroup_settings = &conf->cgroup;
        struct lxc_list *iterator, *next;
        struct lxc_cgroup *cg;
        bool ret = false;
@@ -2699,12 +2655,13 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops 
*ops,
        return 0;
 }
 
-static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
-                                     struct lxc_list *cgroup_settings,
-                                     struct lxc_conf *conf)
+__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
+                                            struct lxc_handler *handler)
 {
        struct lxc_list *iterator;
        struct hierarchy *h = ops->unified;
+       struct lxc_conf *conf = handler->conf;
+       struct lxc_list *cgroup_settings = &conf->cgroup2;
 
        if (lxc_list_empty(cgroup_settings))
                return true;
@@ -2798,18 +2755,79 @@ __cgfsng_ops bool cgfsng_devices_activate(struct 
cgroup_ops *ops,
        return true;
 }
 
-__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
-                                            struct lxc_conf *conf,
-                                            bool do_devices)
+bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
 {
-       if (!__cg_legacy_setup_limits(ops, &conf->cgroup, do_devices))
-               return false;
+       __do_free char *add_controllers = NULL, *base_path = NULL;
+       struct hierarchy *unified = ops->unified;
+       ssize_t parts_len;
+       char **it;
+       size_t full_len = 0;
+       char **parts = NULL;
+       bool bret = false;
 
-       /* for v2 we will have already set up devices */
-       if (do_devices)
+       if (!ops->hierarchies || !pure_unified_layout(ops) ||
+           !unified->controllers[0])
                return true;
 
-       return __cg_unified_setup_limits(ops, &conf->cgroup2, conf);
+       /* For now we simply enable all controllers that we have detected by
+        * creating a string like "+memory +pids +cpu +io".
+        * TODO: In the near future we might want to support "-<controller>"
+        * etc. but whether supporting semantics like this make sense will need
+        * some thinking.
+        */
+       for (it = unified->controllers; it && *it; it++) {
+               full_len += strlen(*it) + 2;
+               add_controllers = must_realloc(add_controllers, full_len + 1);
+
+               if (unified->controllers[0] == *it)
+                       add_controllers[0] = '\0';
+
+               (void)strlcat(add_controllers, "+", full_len + 1);
+               (void)strlcat(add_controllers, *it, full_len + 1);
+
+               if ((it + 1) && *(it + 1))
+                       (void)strlcat(add_controllers, " ", full_len + 1);
+       }
+
+       parts = lxc_string_split(cgroup, '/');
+       if (!parts)
+               goto on_error;
+
+       parts_len = lxc_array_len((void **)parts);
+       if (parts_len > 0)
+               parts_len--;
+
+       base_path = must_make_path(unified->mountpoint, 
unified->container_base_path, NULL);
+       for (ssize_t i = -1; i < parts_len; i++) {
+               int ret;
+               __do_free char *target = NULL;
+
+               if (i >= 0)
+                       base_path = must_append_path(base_path, parts[i], NULL);
+               target = must_make_path(base_path, "cgroup.subtree_control", 
NULL);
+               ret = lxc_writeat(-1, target, add_controllers, full_len);
+               if (ret < 0) {
+                       SYSERROR("Could not enable \"%s\" controllers in the 
unified cgroup \"%s\"", add_controllers, target);
+                       goto on_error;
+               }
+               TRACE("Enable \"%s\" controllers in the unified cgroup \"%s\"", 
add_controllers, target);
+       }
+
+       bret = true;
+
+on_error:
+       lxc_free_array((void **)parts, free);
+       return bret;
+}
+
+__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
+{
+       return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
+}
+
+__cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
+{
+       return __cgfsng_delegate_controllers(ops, ops->container_cgroup);
 }
 
 static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
@@ -3062,15 +3080,15 @@ static int cg_unified_init(struct cgroup_ops *ops, bool 
relative,
        base_cgroup = cg_unified_get_current_cgroup(relative);
        if (!base_cgroup)
                return -EINVAL;
-       prune_init_scope(base_cgroup);
+       if (!relative)
+               prune_init_scope(base_cgroup);
 
        /* We assume that we have already been given controllers to delegate
         * further down the hierarchy. If not it is up to the user to delegate
         * them to us.
         */
        mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
-       subtree_path = must_make_path(mountpoint, base_cgroup,
-                                     "cgroup.subtree_control", NULL);
+       subtree_path = must_make_path(mountpoint, base_cgroup, 
"cgroup.controllers", NULL);
        delegatable = cg_unified_get_controllers(subtree_path);
        if (!delegatable)
                delegatable = cg_unified_make_empty_controller();
@@ -3162,6 +3180,8 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
        cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
        cgfsng_ops->monitor_create = cgfsng_monitor_create;
        cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
+       cgfsng_ops->monitor_delegate_controllers = 
cgfsng_monitor_delegate_controllers;
+       cgfsng_ops->payload_delegate_controllers = 
cgfsng_payload_delegate_controllers;
        cgfsng_ops->payload_create = cgfsng_payload_create;
        cgfsng_ops->payload_enter = cgfsng_payload_enter;
        cgfsng_ops->escape = cgfsng_escape;
@@ -3172,6 +3192,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
        cgfsng_ops->set = cgfsng_set;
        cgfsng_ops->freeze = cgfsng_freeze;
        cgfsng_ops->unfreeze = cgfsng_unfreeze;
+       cgfsng_ops->setup_limits_legacy = cgfsng_setup_limits_legacy;
        cgfsng_ops->setup_limits = cgfsng_setup_limits;
        cgfsng_ops->driver = "cgfsng";
        cgfsng_ops->version = "1.0.0";
diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
index 35e4b5ae4e..8804d59ac3 100644
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -65,6 +65,7 @@ void cgroup_exit(struct cgroup_ops *ops)
 
        free(ops->cgroup_pattern);
        free(ops->container_cgroup);
+       free(ops->monitor_cgroup);
 
        if (ops->cgroup2_devices)
                bpf_program_free(ops->cgroup2_devices);
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index 81320e4876..80d2c315a3 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -88,6 +88,7 @@ struct cgroup_ops {
        char **cgroup_use;
        char *cgroup_pattern;
        char *container_cgroup;
+       char *monitor_cgroup;
 
        /* Static memory, do not free.*/
        const char *monitor_pattern;
@@ -135,9 +136,9 @@ struct cgroup_ops {
        void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler 
*handler);
        void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler 
*handler);
        bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler 
*handler);
-       bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid);
+       bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler 
*handler);
        bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler 
*handler);
-       bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid);
+       bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler 
*handler);
        const char *(*get_cgroup)(struct cgroup_ops *ops, const char 
*controller);
        bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
        int (*num_hierarchies)(struct cgroup_ops *ops);
@@ -148,8 +149,9 @@ struct cgroup_ops {
                   size_t len, const char *name, const char *lxcpath);
        int (*freeze)(struct cgroup_ops *ops, int timeout);
        int (*unfreeze)(struct cgroup_ops *ops, int timeout);
-       bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
-                            bool with_devices);
+       bool (*setup_limits_legacy)(struct cgroup_ops *ops,
+                                   struct lxc_conf *conf, bool with_devices);
+       bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_handler 
*handler);
        bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
        bool (*attach)(struct cgroup_ops *ops, const char *name,
                       const char *lxcpath, pid_t pid);
@@ -158,6 +160,8 @@ struct cgroup_ops {
        int (*nrtasks)(struct cgroup_ops *ops);
        bool (*devices_activate)(struct cgroup_ops *ops,
                                 struct lxc_handler *handler);
+       bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
+       bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
 };
 
 extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index e89caf4e15..b97b58ec6a 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -824,6 +824,15 @@ static bool wait_on_daemonized_start(struct lxc_handler 
*handler, int pid)
 {
        int ret, state;
 
+       /* The first child is going to fork() again and then exits. So we reap
+        * the first child here.
+        */
+       ret = wait_for_pid(pid);
+       if (ret < 0)
+               DEBUG("Failed waiting on first child %d", pid);
+       else
+               DEBUG("First child %d exited", pid);
+
        /* Close write end of the socket pair. */
        close(handler->state_socket_pair[1]);
        handler->state_socket_pair[1] = -1;
@@ -834,15 +843,6 @@ static bool wait_on_daemonized_start(struct lxc_handler 
*handler, int pid)
        close(handler->state_socket_pair[0]);
        handler->state_socket_pair[0] = -1;
 
-       /* The first child is going to fork() again and then exits. So we reap
-        * the first child here.
-        */
-       ret = wait_for_pid(pid);
-       if (ret < 0)
-               DEBUG("Failed waiting on first child %d", pid);
-       else
-               DEBUG("First child %d exited", pid);
-
        if (state < 0) {
                SYSERROR("Failed to receive the container state");
                return false;
@@ -935,17 +935,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int 
useinit, char * const a
        if (c->daemonize) {
                bool started;
                char title[2048];
-               pid_t pid;
+               pid_t pid_first, pid_second;
 
-               pid = fork();
-               if (pid < 0) {
+               pid_first = fork();
+               if (pid_first < 0) {
                        free_init_cmd(init_cmd);
                        lxc_free_handler(handler);
                        return false;
                }
 
                /* first parent */
-               if (pid != 0) {
+               if (pid_first != 0) {
                        /* Set to NULL because we don't want father unlink
                         * the PID file, child will do the free and unlink.
                         */
@@ -954,7 +954,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int 
useinit, char * const a
                        /* Wait for container to tell us whether it started
                         * successfully.
                         */
-                       started = wait_on_daemonized_start(handler, pid);
+                       started = wait_on_daemonized_start(handler, pid_first);
 
                        free_init_cmd(init_cmd);
                        lxc_free_handler(handler);
@@ -980,14 +980,14 @@ static bool do_lxcapi_start(struct lxc_container *c, int 
useinit, char * const a
                 * POSIX's daemon() function we change to "/" and redirect
                 * std{in,out,err} to /dev/null.
                 */
-               pid = fork();
-               if (pid < 0) {
+               pid_second = fork();
+               if (pid_second < 0) {
                        SYSERROR("Failed to fork first child process");
                        _exit(EXIT_FAILURE);
                }
 
                /* second parent */
-               if (pid != 0) {
+               if (pid_second != 0) {
                        free_init_cmd(init_cmd);
                        lxc_free_handler(handler);
                        _exit(EXIT_SUCCESS);
diff --git a/src/lxc/macro.h b/src/lxc/macro.h
index 2aeda4e3da..e011596d21 100644
--- a/src/lxc/macro.h
+++ b/src/lxc/macro.h
@@ -448,6 +448,12 @@ enum {
                -1;                    \
        })
 
+#define ret_set_errno(__ret__, __errno__) \
+       ({                                \
+               errno = __errno__;        \
+               __ret__;                  \
+       })
+
 #define free_replace_move_ptr(a, b) \
        ({                          \
                free(a);            \
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 6e2f0ab046..aa4939945d 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -737,6 +737,10 @@ struct lxc_handler *lxc_init_handler(const char *name, 
struct lxc_conf *conf,
                handler->nsfd[i] = -1;
 
        handler->name = name;
+       if (daemonize)
+               handler->transient_pid = lxc_raw_getpid();
+       else
+               handler->transient_pid = -1;
 
        if (daemonize && handler->conf->reboot == REBOOT_NONE) {
                /* Create socketpair() to synchronize on daemonized startup.
@@ -912,7 +916,7 @@ int lxc_init(const char *name, struct lxc_handler *handler)
        ret = lsm_process_prepare(conf, handler->lxcpath);
        if (ret < 0) {
                ERROR("Failed to initialize LSM");
-               goto out_destroy_cgroups;
+               goto out_delete_terminal;
        }
        TRACE("Initialized LSM");
 
@@ -920,10 +924,6 @@ int lxc_init(const char *name, struct lxc_handler *handler)
        handler->monitor_status_fd = move_fd(status_fd);
        return 0;
 
-out_destroy_cgroups:
-       handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
-       handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
-
 out_delete_terminal:
        lxc_terminal_delete(&handler->conf->console);
 
@@ -1016,8 +1016,10 @@ void lxc_fini(const char *name, struct lxc_handler 
*handler)
 
        lsm_process_cleanup(handler->conf, handler->lxcpath);
 
-       cgroup_ops->payload_destroy(cgroup_ops, handler);
-       cgroup_ops->monitor_destroy(cgroup_ops, handler);
+       if (cgroup_ops) {
+               cgroup_ops->payload_destroy(cgroup_ops, handler);
+               cgroup_ops->monitor_destroy(cgroup_ops, handler);
+       }
 
        if (handler->conf->reboot == REBOOT_NONE) {
                /* For all new state clients simply close the command socket.
@@ -1813,14 +1815,24 @@ static int lxc_spawn(struct lxc_handler *handler)
        if (ret < 0)
                goto out_delete_net;
 
-       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) {
+       if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) 
{
                ERROR("Failed to setup cgroup limits for container \"%s\"", 
name);
                goto out_delete_net;
        }
 
-       if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
+       if (!cgroup_ops->payload_enter(cgroup_ops, handler))
                goto out_delete_net;
 
+       if (!cgroup_ops->payload_delegate_controllers(cgroup_ops)) {
+               ERROR("Failed to delegate controllers to payload cgroup");
+               goto out_delete_net;
+       }
+
+       if (!cgroup_ops->setup_limits(cgroup_ops, handler)) {
+               ERROR("Failed to setup cgroup limits for container \"%s\"", 
name);
+               goto out_delete_net;
+       }
+
        if (!cgroup_ops->chown(cgroup_ops, handler->conf))
                goto out_delete_net;
 
@@ -1883,7 +1895,7 @@ static int lxc_spawn(struct lxc_handler *handler)
        if (ret < 0)
                goto out_delete_net;
 
-       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
+       if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) {
                ERROR("Failed to setup legacy device cgroup controller limits");
                goto out_delete_net;
        }
@@ -2015,12 +2027,18 @@ int __lxc_start(const char *name, struct lxc_handler 
*handler,
                goto out_fini_nonet;
        }
 
-       if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
+       if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
                ERROR("Failed to enter monitor cgroup");
                ret = -1;
                goto out_fini_nonet;
        }
 
+       if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
+               ERROR("Failed to delegate controllers to monitor cgroup");
+               ret = -1;
+               goto out_fini_nonet;
+       }
+
        if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
                /* If the backing store is a device, mount it here and now. */
                if (rootfs_is_blockdev(conf)) {
diff --git a/src/lxc/start.h b/src/lxc/start.h
index dc40f29eeb..662ac55704 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -89,6 +89,9 @@ struct lxc_handler {
         */
        int proc_pidfd;
 
+       /* The grandfather's pid when double-forking. */
+       pid_t transient_pid;
+
        /* The monitor's pid. */
        pid_t monitor_pid;

_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

[lxc-devel] [lxc/master] cgroup2: rework controller delegation

Reply via email to