This is done so that each container could set it's own release agent.
Release agent information is now stored in per-cgroup-root data
structure in ve.

https://jira.sw.ru/browse/PSBM-83887

Signed-off-by: Valeriy Vdovin <[email protected]>
Reviewed-by: Kirill Tkhai <[email protected]>
---
 include/linux/cgroup.h |   5 +-
 include/linux/ve.h     |   7 +++
 kernel/cgroup.c        | 133 ++++++++++++++++++++++++++++++++++++++++---------
 kernel/ve/ve.c         |  69 ++++++++++++++++++++++++-
 4 files changed, 186 insertions(+), 28 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d0ce3cc..911dd48 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -394,9 +394,6 @@ struct cgroupfs_root {
        /* IDs for cgroups in this hierarchy */
        struct ida cgroup_ida;
 
-       /* The path to use for release notifications. */
-       char release_agent_path[PATH_MAX];
-
        /* The name for this hierarchy - may be empty */
        char name[MAX_CGROUP_ROOT_NAMELEN];
 };
@@ -639,7 +636,7 @@ int cgroup_task_count(const struct cgroup *cgrp);
 void cgroup_release_agent(struct work_struct *work);
 
 #ifdef CONFIG_VE
-void cgroup_mark_ve_roots(struct ve_struct *ve);
+int cgroup_mark_ve_roots(struct ve_struct *ve);
 void cgroup_unmark_ve_roots(struct ve_struct *ve);
 struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp);
 #endif
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 94a07df..da719b4 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -214,6 +214,13 @@ void do_update_load_avg_ve(void);
 
 void ve_add_to_release_list(struct cgroup *cgrp);
 void ve_rm_from_release_list(struct cgroup *cgrp);
+
+int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot,
+       const char *release_agent);
+
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+       struct cgroup *cgrp_root);
+
 extern struct ve_struct *get_ve(struct ve_struct *ve);
 extern void put_ve(struct ve_struct *ve);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fa1a881..9b7c8f3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1094,10 +1094,21 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 
 static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 {
+       const char *release_agent;
        struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
        struct cgroup_subsys *ss;
+       struct cgroup *root_cgrp = &root->top_cgroup;
 
+#ifdef CONFIG_VE
+       struct ve_struct *ve = get_exec_env();
+       if (!ve_is_super(ve)) {
+               mutex_lock(&cgroup_mutex);
+               root_cgrp = task_cgroup_from_root(ve->init_task, root);
+               mutex_unlock(&cgroup_mutex);
+       }
+#endif
        mutex_lock(&cgroup_root_mutex);
+
        for_each_subsys(root, ss)
                seq_printf(seq, ",%s", ss->name);
        if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
@@ -1108,9 +1119,12 @@ static int cgroup_show_options(struct seq_file *seq, 
struct dentry *dentry)
                seq_puts(seq, ",xattr");
        if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
                seq_puts(seq, ",cpuset_v2_mode");
-       if (strlen(root->release_agent_path))
-               seq_show_option(seq, "release_agent",
-                               root->release_agent_path);
+       rcu_read_lock();
+       release_agent = ve_get_release_agent_path(root_cgrp->ve_owner,
+               root_cgrp);
+       if (release_agent && release_agent[0])
+               seq_show_option(seq, "release_agent", release_agent);
+       rcu_read_unlock();
        if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
                seq_puts(seq, ",clone_children");
        if (strlen(root->name))
@@ -1388,8 +1402,13 @@ static int cgroup_remount(struct super_block *sb, int 
*flags, char *data)
        /* re-populate subsystem files */
        cgroup_populate_dir(cgrp, false, added_mask);
 
-       if (opts.release_agent)
-               strcpy(root->release_agent_path, opts.release_agent);
+       if (opts.release_agent) {
+               struct cgroup *root_cgrp;
+               root_cgrp = cgroup_get_local_root(cgrp);
+               if (root_cgrp->ve_owner)
+                       ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+                               root_cgrp, opts.release_agent);
+       }
  out_unlock:
        kfree(opts.release_agent);
        kfree(opts.name);
@@ -1550,8 +1569,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct 
cgroup_sb_opts *opts)
        root->subsys_mask = opts->subsys_mask;
        root->flags = opts->flags;
        ida_init(&root->cgroup_ida);
-       if (opts->release_agent)
-               strcpy(root->release_agent_path, opts->release_agent);
        if (opts->name)
                strcpy(root->name, opts->name);
        if (opts->cpuset_clone_children)
@@ -2317,27 +2334,44 @@ static int cgroup_procs_write(struct cgroup *cgrp, 
struct cftype *cft, u64 tgid)
 static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
                                      const char *buffer)
 {
-       BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-
+       int ret = 0;
+       struct cgroup *root_cgrp;
        if (strlen(buffer) >= PATH_MAX)
                return -EINVAL;
 
        if (!cgroup_lock_live_group(cgrp))
                return -ENODEV;
 
-       mutex_lock(&cgroup_root_mutex);
-       strcpy(cgrp->root->release_agent_path, buffer);
-       mutex_unlock(&cgroup_root_mutex);
+       root_cgrp = cgroup_get_local_root(cgrp);
+       BUG_ON(!root_cgrp);
+       if (root_cgrp->ve_owner) {
+               ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+                       root_cgrp, buffer);
+       }
+
        mutex_unlock(&cgroup_mutex);
-       return 0;
+       return ret;
 }
 
 static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
                                     struct seq_file *seq)
 {
+       const char *release_agent;
+       struct cgroup *root_cgrp;
+
        if (!cgroup_lock_live_group(cgrp))
                return -ENODEV;
-       seq_puts(seq, cgrp->root->release_agent_path);
+
+       root_cgrp = cgroup_get_local_root(cgrp);
+       if (root_cgrp->ve_owner) {
+               rcu_read_lock();
+               release_agent = ve_get_release_agent_path(
+                       root_cgrp->ve_owner, root_cgrp);
+
+               if (release_agent)
+                       seq_puts(seq, release_agent);
+               rcu_read_unlock();
+       }
        seq_putc(seq, '\n');
        mutex_unlock(&cgroup_mutex);
        return 0;
@@ -4153,7 +4187,7 @@ static struct cftype files[] = {
        },
        {
                .name = "release_agent",
-               .flags = CFTYPE_ONLY_ON_ROOT,
+               .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE,
                .read_seq_string = cgroup_release_agent_show,
                .write_string = cgroup_release_agent_write,
                .max_write_len = PATH_MAX,
@@ -4282,22 +4316,60 @@ static int subgroups_count(struct cgroup *cgroup)
        return cgrps_count;
 }
 
+static struct cftype *get_cftype_by_name(const char *name)
+{
+       struct cftype *cft;
+       for (cft = files; cft->name[0] != '\0'; cft++) {
+               if (!strcmp(cft->name, name))
+                       return cft;
+       }
+       return NULL;
+}
+
 #ifdef CONFIG_VE
-void cgroup_mark_ve_roots(struct ve_struct *ve)
+int cgroup_mark_ve_roots(struct ve_struct *ve)
 {
-       struct cgroup *cgrp;
+       struct cgroup *cgrp, *tmp;
        struct cgroupfs_root *root;
+       int err = 0;
+       struct cftype *cft;
+       LIST_HEAD(pending);
+
+       cft = get_cftype_by_name("release_agent");
+       BUG_ON(!cft);
 
+       mutex_lock(&cgroup_cft_mutex);
        mutex_lock(&cgroup_mutex);
        for_each_active_root(root) {
                cgrp = task_cgroup_from_root(ve->init_task, root);
-               cgrp->ve_owner = ve;
+               rcu_assign_pointer(cgrp->ve_owner, ve);
                set_bit(CGRP_VE_ROOT, &cgrp->flags);
-
+               dget(cgrp->dentry);
+               list_add_tail(&cgrp->cft_q_node, &pending);
                if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask))
                        link_ve_root_cpu_cgroup(cgrp);
        }
        mutex_unlock(&cgroup_mutex);
+       list_for_each_entry_safe(cgrp, tmp, &pending, cft_q_node) {
+               struct inode *inode = cgrp->dentry->d_inode;
+
+               if (err) {
+                       dput(cgrp->dentry);
+                       continue;
+               }
+
+               mutex_lock(&inode->i_mutex);
+               mutex_lock(&cgroup_mutex);
+               if (!cgroup_is_removed(cgrp))
+                       err = cgroup_add_file(cgrp, NULL, cft);
+               mutex_unlock(&cgroup_mutex);
+               mutex_unlock(&inode->i_mutex);
+
+               list_del_init(&cgrp->cft_q_node);
+               dput(cgrp->dentry);
+       }
+       mutex_unlock(&cgroup_cft_mutex);
+       return err;
 }
 
 void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -5432,13 +5504,22 @@ static void check_for_release(struct cgroup *cgrp)
 void cgroup_release_agent(struct work_struct *work)
 {
        struct ve_struct *ve;
+       char *agentbuf;
+
+       agentbuf = kzalloc(PATH_MAX, GFP_KERNEL);
+       if (!agentbuf) {
+               pr_warn("failed to allocate agentbuf\n");
+               return;
+       }
+
        ve = container_of(work, struct ve_struct, release_agent_work);
        mutex_lock(&cgroup_mutex);
        raw_spin_lock(&ve->release_list_lock);
        while (!list_empty(&ve->release_list)) {
                char *argv[3], *envp[3];
                int i, err;
-               char *pathbuf = NULL, *agentbuf = NULL;
+               const char *release_agent;
+               char *pathbuf = NULL;
                struct cgroup *cgrp, *root_cgrp;
 
                cgrp = list_entry(ve->release_list.next,
@@ -5458,9 +5539,15 @@ void cgroup_release_agent(struct work_struct *work)
                        rcu_read_unlock();
                        goto continue_free;
                }
+
+               release_agent = ve_get_release_agent_path(ve, root_cgrp);
+
+               *agentbuf = 0;
+               if (release_agent)
+                       strcpy(agentbuf, release_agent);
                rcu_read_unlock();
-               agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
-               if (!agentbuf)
+
+               if (!*agentbuf)
                        goto continue_free;
 
                i = 0;
@@ -5489,11 +5576,11 @@ void cgroup_release_agent(struct work_struct *work)
                mutex_lock(&cgroup_mutex);
  continue_free:
                kfree(pathbuf);
-               kfree(agentbuf);
                raw_spin_lock(&ve->release_list_lock);
        }
        raw_spin_unlock(&ve->release_list_lock);
        mutex_unlock(&cgroup_mutex);
+       kfree(agentbuf);
 }
 
 static int __init cgroup_disable(char *str)
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index bee3696..12f7497 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -51,6 +51,11 @@ struct per_cgroot_data {
         * data is related to this cgroup
         */
        struct cgroup *cgroot;
+       /*
+        * path to release agent binaray, that should
+        * be spawned for all cgroups under this cgroup root
+        */
+       struct cgroup_rcu_string __rcu *release_agent_path;
 };
 
 extern struct kmapset_set sysfs_ve_perms_set;
@@ -175,6 +180,59 @@ static inline struct per_cgroot_data 
*per_cgroot_get_or_create(
        return data;
 }
 
+int ve_set_release_agent_path(struct ve_struct *ve,
+       struct cgroup *cgroot, const char *release_agent)
+{
+       struct per_cgroot_data *data;
+       struct cgroup_rcu_string *new_path, *old_path;
+       int err = 0;
+
+       new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent));
+       if (IS_ERR(new_path))
+               return PTR_ERR(new_path);
+
+       data = per_cgroot_get_or_create(ve, cgroot);
+       if (IS_ERR(data)) {
+               kfree(new_path);
+               return PTR_ERR(data);
+       }
+
+       raw_spin_lock(&ve->per_cgroot_list_lock);
+
+       old_path = rcu_dereference_protected(data->release_agent_path,
+               lockdep_is_held(&ve->per_cgroot_list_lock));
+
+       rcu_assign_pointer(data->release_agent_path, new_path);
+       raw_spin_unlock(&ve->per_cgroot_list_lock);
+
+       if (old_path)
+               kfree_rcu(old_path, rcu_head);
+
+       return err;
+}
+
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+       struct cgroup *cgroot)
+{
+       /* caller must grab rcu_read_lock */
+       const char *result = NULL;
+       struct per_cgroot_data *data;
+       struct cgroup_rcu_string *str;
+
+       raw_spin_lock(&ve->per_cgroot_list_lock);
+
+       data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
+       raw_spin_unlock(&ve->per_cgroot_list_lock);
+
+       if (!data)
+               return NULL;
+
+       str = rcu_dereference(data->release_agent_path);
+       if (str)
+               result = str->val;
+       return result;
+}
+
 struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int 
subsys_id)
 {
        struct cgroup_subsys_state *css, *tmp;
@@ -646,7 +704,9 @@ static int ve_start_container(struct ve_struct *ve)
        if (err < 0)
                goto err_iterate;
 
-       cgroup_mark_ve_roots(ve);
+       err = cgroup_mark_ve_roots(ve);
+       if (err)
+               goto err_mark_ve;
 
        ve->is_running = 1;
 
@@ -656,6 +716,8 @@ static int ve_start_container(struct ve_struct *ve)
 
        return 0;
 
+err_mark_ve:
+       ve_hook_iterate_fini(VE_SS_CHAIN, ve);
 err_iterate:
        ve_workqueue_stop(ve);
 err_workqueue:
@@ -672,7 +734,12 @@ err_list:
 static void per_cgroot_free_all_locked(struct list_head *per_cgroot_list)
 {
        struct per_cgroot_data *data, *saved;
+       struct cgroup_rcu_string *release_agent;
        list_for_each_entry_safe(data, saved, per_cgroot_list, list) {
+               release_agent = data->release_agent_path;
+               RCU_INIT_POINTER(data->release_agent_path, NULL);
+               if (release_agent)
+                       kfree_rcu(release_agent, rcu_head);
                list_del_init(&data->list);
                kfree(data);
        }
-- 
1.8.3.1

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to