callback_lock grants the holder read-only access to cpusets.  For fixing
a synchronization issue between cpusets and scheduler core, it is now
required to make callback_lock available to core scheduler code.

Convert callback_lock to raw_spin_lock, so that it will be always safe
to acquire it from atomic context.

Unfortunately, callback_lock guards some user-controlled operations
(e.g., cpuset_cpus_allowed, cpuset_mems_allowed, etc.), which is usually
bad for a raw_spin_lock. Ideally we'd like to somehow avoid this, but we
have this behavior spread all over the place. A price we decide to pay
for the time being.

Signed-off-by: Juri Lelli <[email protected]>

---

v6->v7: Added comment in changelog about callback_lock potential
problems w.r.t. userspace ops. [peterz]
---
 kernel/cgroup/cpuset.c | 70 +++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 4834c4214e9c..ff9bd5abe613 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -333,7 +333,7 @@ static struct cpuset top_cpuset = {
  */
 
 static DEFINE_MUTEX(cpuset_mutex);
-static DEFINE_SPINLOCK(callback_lock);
+static DEFINE_RAW_SPINLOCK(callback_lock);
 
 static struct workqueue_struct *cpuset_migrate_mm_wq;
 
@@ -1235,7 +1235,7 @@ static int update_parent_subparts_cpumask(struct cpuset 
*cpuset, int cmd,
         * Newly added CPUs will be removed from effective_cpus and
         * newly deleted ones will be added back to effective_cpus.
         */
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        if (adding) {
                cpumask_or(parent->subparts_cpus,
                           parent->subparts_cpus, tmp->addmask);
@@ -1254,7 +1254,7 @@ static int update_parent_subparts_cpumask(struct cpuset 
*cpuset, int cmd,
        }
 
        parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        return cmd == partcmd_update;
 }
@@ -1359,7 +1359,7 @@ static void update_cpumasks_hier(struct cpuset *cs, 
struct tmpmasks *tmp)
                        continue;
                rcu_read_unlock();
 
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
 
                cpumask_copy(cp->effective_cpus, tmp->new_cpus);
                if (cp->nr_subparts_cpus &&
@@ -1390,7 +1390,7 @@ static void update_cpumasks_hier(struct cpuset *cs, 
struct tmpmasks *tmp)
                                        = cpumask_weight(cp->subparts_cpus);
                        }
                }
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
 
                WARN_ON(!is_in_v2_mode() &&
                        !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
@@ -1508,7 +1508,7 @@ static int update_cpumask(struct cpuset *cs, struct 
cpuset *trialcs,
                        return -EINVAL;
        }
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
 
        /*
@@ -1519,7 +1519,7 @@ static int update_cpumask(struct cpuset *cs, struct 
cpuset *trialcs,
                               cs->cpus_allowed);
                cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
        }
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        update_cpumasks_hier(cs, &tmp);
 
@@ -1713,9 +1713,9 @@ static void update_nodemasks_hier(struct cpuset *cs, 
nodemask_t *new_mems)
                        continue;
                rcu_read_unlock();
 
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
                cp->effective_mems = *new_mems;
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
 
                WARN_ON(!is_in_v2_mode() &&
                        !nodes_equal(cp->mems_allowed, cp->effective_mems));
@@ -1783,9 +1783,9 @@ static int update_nodemask(struct cpuset *cs, struct 
cpuset *trialcs,
        if (retval < 0)
                goto done;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cs->mems_allowed = trialcs->mems_allowed;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        /* use trialcs->mems_allowed as a temp variable */
        update_nodemasks_hier(cs, &trialcs->mems_allowed);
@@ -1876,9 +1876,9 @@ static int update_flag(cpuset_flagbits_t bit, struct 
cpuset *cs,
        spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
                        || (is_spread_page(cs) != is_spread_page(trialcs)));
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cs->flags = trialcs->flags;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
                rebuild_sched_domains_locked();
@@ -2381,7 +2381,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, 
void *v)
        cpuset_filetype_t type = seq_cft(sf)->private;
        int ret = 0;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
 
        switch (type) {
        case FILE_CPULIST:
@@ -2403,7 +2403,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, 
void *v)
                ret = -EINVAL;
        }
 
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
        return ret;
 }
 
@@ -2713,14 +2713,14 @@ static int cpuset_css_online(struct cgroup_subsys_state 
*css)
 
        cpuset_inc();
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        if (is_in_v2_mode()) {
                cpumask_copy(cs->effective_cpus, parent->effective_cpus);
                cs->effective_mems = parent->effective_mems;
                cs->use_parent_ecpus = true;
                parent->child_ecpus_count++;
        }
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
                goto out_unlock;
@@ -2747,12 +2747,12 @@ static int cpuset_css_online(struct cgroup_subsys_state 
*css)
        }
        rcu_read_unlock();
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cs->mems_allowed = parent->mems_allowed;
        cs->effective_mems = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
        cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 out_unlock:
        mutex_unlock(&cpuset_mutex);
        return 0;
@@ -2805,7 +2805,7 @@ static void cpuset_css_free(struct cgroup_subsys_state 
*css)
 static void cpuset_bind(struct cgroup_subsys_state *root_css)
 {
        mutex_lock(&cpuset_mutex);
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
 
        if (is_in_v2_mode()) {
                cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
@@ -2816,7 +2816,7 @@ static void cpuset_bind(struct cgroup_subsys_state 
*root_css)
                top_cpuset.mems_allowed = top_cpuset.effective_mems;
        }
 
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
        mutex_unlock(&cpuset_mutex);
 }
 
@@ -2917,12 +2917,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
 {
        bool is_empty;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, new_cpus);
        cpumask_copy(cs->effective_cpus, new_cpus);
        cs->mems_allowed = *new_mems;
        cs->effective_mems = *new_mems;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        /*
         * Don't call update_tasks_cpumask() if the cpuset becomes empty,
@@ -2959,10 +2959,10 @@ hotplug_update_tasks(struct cpuset *cs,
        if (nodes_empty(*new_mems))
                *new_mems = parent_cs(cs)->effective_mems;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cpumask_copy(cs->effective_cpus, new_cpus);
        cs->effective_mems = *new_mems;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        if (cpus_updated)
                update_tasks_cpumask(cs);
@@ -3117,7 +3117,7 @@ static void cpuset_hotplug_workfn(struct work_struct 
*work)
 
        /* synchronize cpus_allowed to cpu_active_mask */
        if (cpus_updated) {
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
                if (!on_dfl)
                        cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
                /*
@@ -3137,17 +3137,17 @@ static void cpuset_hotplug_workfn(struct work_struct 
*work)
                        }
                }
                cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
                /* we don't mess with cpumasks of tasks in top_cpuset */
        }
 
        /* synchronize mems_allowed to N_MEMORY */
        if (mems_updated) {
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
                if (!on_dfl)
                        top_cpuset.mems_allowed = new_mems;
                top_cpuset.effective_mems = new_mems;
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
                update_tasks_nodemask(&top_cpuset);
        }
 
@@ -3248,11 +3248,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, 
struct cpumask *pmask)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&callback_lock, flags);
+       raw_spin_lock_irqsave(&callback_lock, flags);
        rcu_read_lock();
        guarantee_online_cpus(task_cs(tsk), pmask);
        rcu_read_unlock();
-       spin_unlock_irqrestore(&callback_lock, flags);
+       raw_spin_unlock_irqrestore(&callback_lock, flags);
 }
 
 void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
@@ -3300,11 +3300,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
        nodemask_t mask;
        unsigned long flags;
 
-       spin_lock_irqsave(&callback_lock, flags);
+       raw_spin_lock_irqsave(&callback_lock, flags);
        rcu_read_lock();
        guarantee_online_mems(task_cs(tsk), &mask);
        rcu_read_unlock();
-       spin_unlock_irqrestore(&callback_lock, flags);
+       raw_spin_unlock_irqrestore(&callback_lock, flags);
 
        return mask;
 }
@@ -3396,14 +3396,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
                return true;
 
        /* Not hardwall and node outside mems_allowed: scan up cpusets */
-       spin_lock_irqsave(&callback_lock, flags);
+       raw_spin_lock_irqsave(&callback_lock, flags);
 
        rcu_read_lock();
        cs = nearest_hardwall_ancestor(task_cs(current));
        allowed = node_isset(node, cs->mems_allowed);
        rcu_read_unlock();
 
-       spin_unlock_irqrestore(&callback_lock, flags);
+       raw_spin_unlock_irqrestore(&callback_lock, flags);
        return allowed;
 }
 
-- 
2.17.2

Reply via email to