This allows us to unbind a cgroup subsystem from a hierarchy
which has sub-cgroups in it.

If a subsystem is to support unbinding, when pinning a cgroup
via css refcnt, it should use __css_tryget() instead of css_get().

Usage:

 # mount -t cgroup -o cpuset,cpuacct xxx /mnt
 # mkdir /mnt/tmp
 # echo $$ > /mnt/tmp/tasks

 (remove it from the hierarchy)
 # mount -o remount,cpuset xxx /mnt

Changelog v2:

- Allow a cgroup subsystem to use css refcnt.
- Add more code comments.
- Use rcu_assign_pointer() in hierarchy_update_css_sets().
- Split can_bind flag to bindable and unbindable flags.

Signed-off-by: Li Zefan <l...@cn.fujitsu.com>
---
 include/linux/cgroup.h |   17 ++++++
 kernel/cgroup.c        |  139 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 138 insertions(+), 18 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d8c4e22..17579b2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -110,6 +110,18 @@ static inline bool css_is_removed(struct 
cgroup_subsys_state *css)
 }
 
 /*
+ * For a subsystem which supports unbinding, call this to get css
+ * refcnt. Called with rcu_read_lock or cgroup_mutex held.
+ */
+
+static inline bool __css_tryget(struct cgroup_subsys_state *css)
+{
+       if (test_bit(CSS_ROOT, &css->flags))
+               return true;
+       return atomic_inc_not_zero(&css->refcnt);
+}
+
+/*
  * Call css_tryget() to take a reference on a css if your existing
  * (known-valid) reference isn't already ref-counted. Returns false if
  * the css has been destroyed.
@@ -495,6 +507,11 @@ struct cgroup_subsys {
         * which has child cgroups.
         */
        bool bindable:1;
+       /*
+        * Indicate if this subsystem can be removed from a cgroup hierarchy
+        * which has child cgroups.
+        */
+       bool unbindable:1;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
        const char *name;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index caac80f..463575d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1055,12 +1055,61 @@ static int hierarchy_attach_css(struct cgroup *cgrp, 
void *data)
 }
 
 /*
- * After attaching new css objects to the cgroup, we need to entangle
- * them into the existing css_sets.
+ * Reset those css objects whose refcnts are cleared.
  */
-static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
+static int hierarchy_reset_css_refs(struct cgroup *cgrp, void *data)
+{
+       unsigned long removed_bits = (unsigned long)data;
+       int i;
+
+       for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+               if (atomic_read(&css->refcnt) == 0)
+                       atomic_set(&css->refcnt, 1);
+       }
+       return 0;
+}
+
+/*
+ * Clear all the css objects' refcnt to 0. If there's a refcnt > 1,
+ * return failure.
+ */
+static int hierarchy_clear_css_refs(struct cgroup *cgrp, void *data)
+{
+       unsigned long removed_bits = (unsigned long)data;
+       int i;
+
+       for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+               struct cgroup_subsys_state *css = cgrp->subsys[i];
+
+               if (atomic_cmpxchg(&css->refcnt, 1, 0) != 1)
+                       goto failed;
+       }
+       return 0;
+failed:
+       hierarchy_reset_css_refs(struct cgroup *cgrp, void *data);
+       return -EBUSY;
+}
+
+/*
+ * We're removing some subsystems from cgroup hierarchy, and here we
+ * remove and destroy the css objects from each cgroup.
+ */
+static int hierarchy_remove_css(struct cgroup *cgrp, void *data)
+{
+       unsigned long removed_bits = (unsigned long)data;
+       int i;
+
+       for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+               subsys[i]->destroy(subsys[i], cgrp);
+               cgrp->subsys[i] = NULL;
+       }
+
+       return 0;
+}
+
+static int hierarchy_update_css_sets(struct cgroup *cgrp,
+                                    unsigned long bits, bool add)
 {
-       unsigned long added_bits = (unsigned long)data;
        int i;
        struct cg_cgroup_link *link;
 
@@ -1069,8 +1118,14 @@ static int hierarchy_update_css_sets(struct cgroup 
*cgrp, void *data)
                struct css_set *cg = link->cg;
                struct hlist_head *hhead;
 
-               for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
-                       rcu_assign_pointer(cg->subsys[i], cgrp->subsys[i]);
+               for_each_set_bit(i, &bits, CGROUP_SUBSYS_COUNT) {
+                       if (add)
+                               rcu_assign_pointer(cg->subsys[i],
+                                                  cgrp->subsys[i]);
+                       else
+                               rcu_assign_pointer(cg->subsys[i],
+                                                  dummytop->subsys[i]);
+               }
 
                /* rehash */
                hlist_del(&cg->hlist);
@@ -1083,6 +1138,30 @@ static int hierarchy_update_css_sets(struct cgroup 
*cgrp, void *data)
 }
 
 /*
+ * After attaching new css objects to the cgroup, we need to entangle
+ * them into the existing css_sets.
+ */
+static int hierarchy_add_to_css_sets(struct cgroup *cgrp, void *data)
+{
+       unsigned long added_bits = (unsigned long)data;
+
+       hierarchy_update_css_sets(cgrp, added_bits, true);
+       return 0;
+}
+
+/*
+ * Before dettaching and destroying css objects from the cgroup, we
+ * should detangle them from the existing css_sets.
+ */
+static int hierarchy_remove_from_css_sets(struct cgroup *cgrp, void *data)
+{
+       unsigned long removed_bits = (unsigned long)data;
+
+       hierarchy_update_css_sets(cgrp, removed_bits, false);
+       return 0;
+}
+
+/*
  * Re-populate each cgroup directory.
  *
  * Note root cgroup's inode mutex is held.
@@ -1127,18 +1206,17 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                }
        }
 
-       /* Removing will be supported later */
-       if (root->number_of_cgroups > 1 && removed_bits)
-               return -EBUSY;
-
        /*
         * For non-trivial hierarchy, check that added subsystems
-        * are all bindable
+        * are all bindable and removed subsystems are all unbindable
         */
        if (root->number_of_cgroups > 1) {
                for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
                        if (!subsys[i]->bindable)
                                return -EBUSY;
+               for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT)
+                       if (!subsys[i]->unbindable)
+                               return -EBUSY;
        }
 
        /* Attach css objects to the top cgroup */
@@ -1154,9 +1232,14 @@ static int rebind_subsystems(struct cgroupfs_root *root,
        err = cgroup_walk_hierarchy(hierarchy_attach_css,
                                    (void *)added_bits, cgrp);
        if (err)
-               goto failed;
+               goto out;
+
+       err = cgroup_walk_hierarchy(hierarchy_clear_css_refs,
+                                   (void *)removed_bits, cgrp);
+       if (err)
+               goto out_remove_css;
 
-       cgroup_walk_hierarchy(hierarchy_update_css_sets,
+       cgroup_walk_hierarchy(hierarchy_add_to_css_sets,
                              (void *)added_bits, cgrp);
 
        /* Process each subsystem */
@@ -1176,11 +1259,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                } else if (bit & removed_bits) {
                        /* We're removing this subsystem */
                        BUG_ON(ss == NULL);
-                       BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
-                       BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
                        mutex_lock(&ss->hierarchy_mutex);
-                       dummytop->subsys[i]->cgroup = dummytop;
-                       cgrp->subsys[i] = NULL;
                        if (ss->bind)
                                ss->bind(ss, dummytop);
                        subsys[i]->root = &rootnode;
@@ -1206,11 +1285,35 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                }
        }
        root->subsys_bits = root->actual_subsys_bits = final_bits;
+
+       for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+               BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
+               BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+
+               dummytop->subsys[i]->cgroup = dummytop;
+               cgrp->subsys[i] = NULL;
+       }
+
+       cgroup_walk_hierarchy(hierarchy_remove_from_css_sets,
+                             (void *)removed_bits, cgrp);
+
+       /*
+        * There might be some pointers to the cgrouip_subsys_state
+        * that we are going to destroy.
+        */
+       synchronize_rcu();
+
+       cgroup_walk_hierarchy(hierarchy_remove_css,
+                             (void *)removed_bits, cgrp);
+
        synchronize_rcu();
 
        return 0;
 
-failed:
+out_remove_css:
+       cgroup_walk_hierarchy(hierarchy_remove_css,
+                             (void *)added_bits, cgrp);
+out:
        for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
                cgrp->subsys[i] = NULL;
 
-- 
1.6.3

_______________________________________________
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to