Making a cgroup a domain root will reserve cpu resource at its parent.
So when a domain root cgroup is destroyed, we need to free the
reserved cpus at its parent. This is now done by doing an auto-off of
the sched.domain_root flag in the offlining phase when a domain root
cgroup is being removed.

Signed-off-by: Waiman Long <long...@redhat.com>
---
 kernel/cgroup/cpuset.c | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 68a9c25..a1d5ccd 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -995,7 +995,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct 
cpumask *new_cpus)
  * If the sched_domain_root flag changes, either the delmask (0=>1) or the
  * addmask (1=>0) will be NULL.
  *
- * Called with cpuset_mutex held.
+ * Called with cpuset_mutex held. Some of the checks are skipped if the
+ * cpuset is being offlined (dying).
  */
 static int update_reserved_cpumask(struct cpuset *cpuset,
        struct cpumask *delmask, struct cpumask *addmask)
@@ -1005,6 +1006,7 @@ static int update_reserved_cpumask(struct cpuset *cpuset,
        struct cpuset *sibling;
        struct cgroup_subsys_state *pos_css;
        int old_count = parent->nr_reserved;
+       bool dying = cpuset->css.flags & CSS_DYING;
 
        /*
         * The parent must be a scheduling domain root.
@@ -1026,9 +1028,9 @@ static int update_reserved_cpumask(struct cpuset *cpuset,
 
        /*
         * A sched_domain_root state change is not allowed if there are
-        * online children.
+        * online children and the cpuset is not dying.
         */
-       if (css_has_online_children(&cpuset->css))
+       if (!dying && css_has_online_children(&cpuset->css))
                return -EBUSY;
 
        if (!old_count) {
@@ -1058,7 +1060,12 @@ static int update_reserved_cpumask(struct cpuset *cpuset,
         * Check if any CPUs in addmask or delmask are in the effective_cpus
         * of a sibling cpuset. The implied cpu_exclusive of a scheduling
         * domain root will ensure there are no overlap in cpus_allowed.
+        *
+        * This check is skipped if the cpuset is dying.
         */
+       if (dying)
+               goto updated_reserved_cpus;
+
        rcu_read_lock();
        cpuset_for_each_child(sibling, pos_css, parent) {
                if ((sibling == cpuset) || !(sibling->css.flags & CSS_ONLINE))
@@ -1077,6 +1084,7 @@ static int update_reserved_cpumask(struct cpuset *cpuset,
         * Newly added reserved CPUs will be removed from effective_cpus
         * and newly deleted ones will be added back if they are online.
         */
+updated_reserved_cpus:
        spin_lock_irq(&callback_lock);
        if (addmask) {
                cpumask_or(parent->reserved_cpus,
@@ -2278,7 +2286,12 @@ static int cpuset_css_online(struct cgroup_subsys_state 
*css)
 /*
  * If the cpuset being removed has its flag 'sched_load_balance'
  * enabled, then simulate turning sched_load_balance off, which
- * will call rebuild_sched_domains_locked().
+ * will call rebuild_sched_domains_locked(). That is not needed
+ * in the default hierarchy where only changes in domain_root
+ * will cause repartitioning.
+ *
+ * If the cpuset has the 'sched.domain_root' flag enabled, simulate
+ * turning 'sched.domain_root" off.
  */
 
 static void cpuset_css_offline(struct cgroup_subsys_state *css)
@@ -2287,7 +2300,18 @@ static void cpuset_css_offline(struct 
cgroup_subsys_state *css)
 
        mutex_lock(&cpuset_mutex);
 
-       if (is_sched_load_balance(cs))
+       /*
+        * A WARN_ON_ONCE() check after calling update_flag() to make
+        * sure that the operation succceeds without failure.
+        */
+       if (is_sched_domain_root(cs)) {
+               int ret = update_flag(CS_SCHED_DOMAIN_ROOT, cs, 0);
+
+               WARN_ON_ONCE(ret);
+       }
+
+       if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+           is_sched_load_balance(cs))
                update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
 
        cpuset_dec();
-- 
1.8.3.1

Reply via email to