From: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com>

Previous patches already switched release agent binary calling mechanism
to call the binary in ve. Now we need to switch the binary path to
per-ve path. So we store release agent path in ve_ra_data structures in
per-ve ve->ra_data_list.

Short history of the patch:

Signed-off-by: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com>
https://jira.sw.ru/browse/PSBM-83887
(cherry-picked from vz7 commit f1199bd9589b7c0914343dcc72f49ddaa9b98496)
https://jira.sw.ru/browse/PSBM-108270
(cherry picked from vz8 commit 40381d9afaff5618fbe93428f125a7acc2bde56d)
+++
ve/cgroup: Add release_agent to each container root cgroup
(cherry picked from vz8 commit 00bd9c411dba17692408fcdd1769fa90b275313b)

vz9 changes:
- rework and cleanup patch description
- don't remove cgroup_kn_lock_live error checking
- switch to new ve_ra_data_set / ve_ra_data_get_path_locked
- add BUG_ON if cft->file_offset is set as that can lead to
  cgroup_add_file(NULL,...) crash
- lock cgroup_mutex as we need it for cgroup_add_file / cgroup_rm_file
  and cgroup_is_dead consistency
- fixup hunks from 00bd9c411dba are merged here and there but without excess
  cft_q_node related stuff
- if for some reason ve sees host root cgroup don't allow writing to it
  and showing it

Note: ve_ra_data_set does allocation so we do not want it under
rcu_read_lock, so we get_ve and release rcu_read_lock. We can get_ve
(not tryget) as we just saw ve_owner so ve is not fully stopped yet and
should be populated.

https://jira.sw.ru/browse/PSBM-134002

Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
---
 include/linux/cgroup-defs.h     |   3 -
 kernel/cgroup/cgroup-internal.h |   1 +
 kernel/cgroup/cgroup-v1.c       | 105 ++++++++++++++++++++++----------
 kernel/cgroup/cgroup.c          |  40 +++++++++++-
 4 files changed, 111 insertions(+), 38 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 2035909f7a0a..ef8ebd8a1363 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -539,9 +539,6 @@ struct cgroup_root {
        /* Hierarchy-specific flags */
        unsigned int flags;
 
-       /* The path to use for release notifications. */
-       char release_agent_path[PATH_MAX];
-
        /* The name for this hierarchy - may be empty */
        char name[MAX_CGROUP_ROOT_NAMELEN];
 };
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 81bf97510198..c23ea4aa8e19 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -267,6 +267,7 @@ extern const struct proc_ns_operations cgroupns_operations;
 extern struct cftype cgroup1_base_files[];
 extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
 extern const struct fs_parameter_spec cgroup1_fs_parameters[];
+struct cftype *get_cftype_by_name(const char *name);
 
 int proc_cgroupstats_show(struct seq_file *m, void *v);
 bool cgroup1_ssid_disabled(int ssid);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 5b3e8e79cc32..fe781bda5962 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -39,9 +39,6 @@ static bool cgroup_no_v1_named;
  */
 static struct workqueue_struct *cgroup_pidlist_destroy_wq;
 
-/* protects cgroup_subsys->release_agent_path */
-static DEFINE_SPINLOCK(release_agent_path_lock);
-
 bool cgroup1_ssid_disabled(int ssid)
 {
        return cgroup_no_v1_mask & (1 << ssid);
@@ -542,28 +539,62 @@ static ssize_t cgroup1_tasks_write(struct 
kernfs_open_file *of,
 static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
                                          char *buf, size_t nbytes, loff_t off)
 {
+       struct cgroup *root_cgrp;
+       struct ve_struct *ve = NULL;
        struct cgroup *cgrp;
-
-       BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+       int ret;
 
        cgrp = cgroup_kn_lock_live(of->kn, false);
        if (!cgrp)
                return -ENODEV;
-       spin_lock(&release_agent_path_lock);
-       strlcpy(cgrp->root->release_agent_path, strstrip(buf),
-               sizeof(cgrp->root->release_agent_path));
-       spin_unlock(&release_agent_path_lock);
+
+       rcu_read_lock();
+       root_cgrp = cgroup_ve_root1(cgrp);
+       if (!root_cgrp) {
+               if (ve_is_super(get_exec_env()) && cgrp == &cgrp->root->cgrp)
+                       ve = &ve0;
+       } else {
+               if (root_cgrp == cgrp)
+                       ve = rcu_dereference(root_cgrp->ve_owner);
+       }
+       get_ve(ve);
+       rcu_read_unlock();
+
+       if (ve) {
+               ret = ve_ra_data_set(ve, cgrp->root, strstrip(buf));
+       } else {
+               /* The ve is stopped or we have a bad cgrp */
+               ret = -ENODEV;
+       }
+
+       put_ve(ve);
        cgroup_kn_unlock(of->kn);
-       return nbytes;
+       return ret ? : nbytes;
 }
 
 static int cgroup_release_agent_show(struct seq_file *seq, void *v)
 {
+       const char *release_agent = NULL;
+       struct cgroup *root_cgrp;
        struct cgroup *cgrp = seq_css(seq)->cgroup;
+       struct ve_struct *ve = NULL;
+
+       rcu_read_lock();
+       root_cgrp = cgroup_ve_root1(cgrp);
+       if (!root_cgrp) {
+               if (ve_is_super(get_exec_env()) && cgrp == &cgrp->root->cgrp)
+                       ve = &ve0;
+       } else {
+               if (root_cgrp == cgrp)
+                       ve = rcu_dereference(root_cgrp->ve_owner);
+       }
+
+       if (ve)
+               release_agent = ve_ra_data_get_path_locked(ve, cgrp->root);
+       if (release_agent)
+               seq_puts(seq, release_agent);
+       rcu_read_unlock();
 
-       spin_lock(&release_agent_path_lock);
-       seq_puts(seq, cgrp->root->release_agent_path);
-       spin_unlock(&release_agent_path_lock);
        seq_putc(seq, '\n');
        return 0;
 }
@@ -661,7 +692,7 @@ struct cftype cgroup1_base_files[] = {
        },
        {
                .name = "release_agent",
-               .flags = CFTYPE_ONLY_ON_ROOT,
+               .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE,
                .seq_show = cgroup_release_agent_show,
                .write = cgroup_release_agent_write,
                .max_write_len = PATH_MAX - 1,
@@ -674,6 +705,17 @@ struct cftype cgroup1_base_files[] = {
        { }     /* terminate */
 };
 
+struct cftype *get_cftype_by_name(const char *name)
+{
+       struct cftype *cft;
+
+       for (cft = cgroup1_base_files; cft->name[0] != '\0'; cft++) {
+               if (!strcmp(cft->name, name))
+                       return cft;
+       }
+       return NULL;
+}
+
 #define _cg_virtualized(x) ((ve_is_super(get_exec_env())) ? (x) : 1)
 
 /* Display information about each subsystem and each hierarchy */
@@ -827,6 +869,7 @@ void cgroup1_release_agent(struct work_struct *work)
        spin_lock_irqsave(&ve->release_list_lock, flags);
        while (!list_empty(&ve->release_list)) {
                struct cgroup *cgrp;
+               const char *release_agent;
 
                cgrp = list_entry(ve->release_list.next,
                                  struct cgroup,
@@ -834,15 +877,14 @@ void cgroup1_release_agent(struct work_struct *work)
                list_del_init(&cgrp->release_list);
                spin_unlock_irqrestore(&ve->release_list_lock, flags);
 
-               /* snoop agent path and exit early if empty */
-               if (!cgrp->root->release_agent_path[0])
-                       goto continue_locked;
-
-               spin_lock(&release_agent_path_lock);
-               strlcpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX);
-               spin_unlock(&release_agent_path_lock);
-               if (!agentbuf[0])
+               rcu_read_lock();
+               release_agent = ve_ra_data_get_path_locked(ve, cgrp->root);
+               if (!release_agent || !release_agent[0]) {
+                       rcu_read_unlock();
                        goto continue_locked;
+               }
+               strlcpy(agentbuf, release_agent, PATH_MAX);
+               rcu_read_unlock();
 
                ret = cgroup_path_ns(cgrp, pathbuf, PATH_MAX, ve_cgroup_ns);
                if (ret < 0 || ret >= PATH_MAX)
@@ -914,6 +956,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct 
kernfs_node *new_parent
 
 static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root 
*kf_root)
 {
+       const char *release_agent;
        struct cgroup_root *root = cgroup_root_from_kf(kf_root);
        struct cgroup_subsys *ss;
        int ssid;
@@ -928,12 +971,11 @@ static int cgroup1_show_options(struct seq_file *seq, 
struct kernfs_root *kf_roo
        if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
                seq_puts(seq, ",cpuset_v2_mode");
 
-       spin_lock(&release_agent_path_lock);
-       if (strlen(root->release_agent_path))
-               seq_show_option(seq, "release_agent",
-                               root->release_agent_path);
-       spin_unlock(&release_agent_path_lock);
-
+       rcu_read_lock();
+       release_agent = ve_ra_data_get_path_locked(get_exec_env(), root);
+       if (release_agent && release_agent[0])
+               seq_show_option(seq, "release_agent", release_agent);
+       rcu_read_unlock();
        if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
                seq_puts(seq, ",clone_children");
        if (strlen(root->name))
@@ -1144,11 +1186,8 @@ int cgroup1_reconfigure(struct fs_context *fc)
 
        WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
 
-       if (ctx->release_agent) {
-               spin_lock(&release_agent_path_lock);
-               strcpy(root->release_agent_path, ctx->release_agent);
-               spin_unlock(&release_agent_path_lock);
-       }
+       if (ctx->release_agent)
+               ret = ve_ra_data_set(get_exec_env(), root, ctx->release_agent);
 
        trace_cgroup_remount(root);
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a1bbb9bdbe5c..546de0f90b53 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -68,6 +68,8 @@
 /* let's not notify more than 100 times per second */
 #define CGROUP_FILE_NOTIFY_MIN_INTV    DIV_ROUND_UP(HZ, 100)
 
+#define CGROUP_FILENAME_RELEASE_AGENT "release_agent"
+
 /*
  * cgroup_mutex is the master lock.  Any modification to cgroup or its
  * hierarchy must be performed while holding it.
@@ -2049,12 +2051,21 @@ static inline bool ve_check_root_cgroups(struct css_set 
*cset)
        return false;
 }
 
+static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup 
*cgrp,
+                          struct cftype *cft, bool activate);
+
 int cgroup_mark_ve_roots(struct ve_struct *ve)
 {
        struct cgrp_cset_link *link;
        struct css_set *cset;
        struct cgroup *cgrp;
+       struct cftype *cft;
+       int err = 0;
+
+       cft = get_cftype_by_name(CGROUP_FILENAME_RELEASE_AGENT);
+       BUG_ON(!cft || cft->file_offset);
 
+       mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
 
        /*
@@ -2067,6 +2078,7 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
 
        if (ve_check_root_cgroups(cset)) {
                spin_unlock_irq(&css_set_lock);
+               mutex_unlock(&cgroup_mutex);
                return -EINVAL;
        }
 
@@ -2078,12 +2090,30 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
 
                rcu_assign_pointer(cgrp->ve_owner, ve);
                set_bit(CGRP_VE_ROOT, &cgrp->flags);
+
+               /*
+                * The cgroupns can hold reference on cgroups which are already
+                * offline with already removed directory on the filesystem. We
+                * should not add files to them.
+                */
+               if (!cgroup_is_dead(cgrp)) {
+                       err = cgroup_add_file(NULL, cgrp, cft, true);
+                       if (err) {
+                               pr_warn("failed to add file to VE_ROOT cgroup,"
+                                       " err:%d\n", err);
+                               break;
+                       }
+               }
        }
 
        link_ve_root_cpu_cgroup(cset->subsys[cpu_cgrp_id]);
        spin_unlock_irq(&css_set_lock);
+       mutex_unlock(&cgroup_mutex);
        synchronize_rcu();
-       return 0;
+
+       if (err)
+               cgroup_unmark_ve_roots(ve);
+       return err;
 }
 
 void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -2091,7 +2121,11 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
        struct cgrp_cset_link *link;
        struct css_set *cset;
        struct cgroup *cgrp;
+       struct cftype *cft;
+
+       cft = get_cftype_by_name(CGROUP_FILENAME_RELEASE_AGENT);
 
+       mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
 
        /*
@@ -2108,11 +2142,13 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
                if (!is_virtualized_cgroup(cgrp))
                        continue;
 
+               cgroup_rm_file(cgrp, cft);
                rcu_assign_pointer(cgrp->ve_owner, NULL);
                clear_bit(CGRP_VE_ROOT, &cgrp->flags);
        }
 
        spin_unlock_irq(&css_set_lock);
+       mutex_unlock(&cgroup_mutex);
        /* ve_owner == NULL will be visible */
        synchronize_rcu();
 }
@@ -2195,7 +2231,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
 
        root->flags = ctx->flags;
        if (ctx->release_agent)
-               strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
+               ve_ra_data_set(get_exec_env(), root, ctx->release_agent);
        if (ctx->name)
                strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
        if (ctx->cpuset_clone_children)
-- 
2.31.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to