Consider:

   sched_setaffinity(p, X);             sched_setaffinity(p, Y);

Then the first will install p->migration_pending = &my_pending; and
issue stop_one_cpu_nowait(pending); and the second one will read
p->migration_pending and _also_ issue: stop_one_cpu_nowait(pending),
the _SAME_ @pending.

This causes stopper list corruption.

Add set_affinity_pending::stop_pending, to indicate if a stopper is in
progress.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 kernel/sched/core.c |   15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1864,6 +1864,7 @@ struct migration_arg {
 
 struct set_affinity_pending {
        refcount_t              refs;
+       unsigned int            stop_pending;
        struct completion       done;
        struct cpu_stop_work    stop_work;
        struct migration_arg    arg;
@@ -1982,12 +1983,15 @@ static int migration_cpu_stop(void *data
                 * determine is_migration_disabled() and so have to chase after
                 * it.
                 */
+               WARN_ON_ONCE(!pending->stop_pending);
                task_rq_unlock(rq, p, &rf);
                stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
                                    &pending->arg, &pending->stop_work);
                return 0;
        }
 out:
+       if (pending)
+               pending->stop_pending = false;
        task_rq_unlock(rq, p, &rf);
 
        if (complete)
@@ -2183,7 +2187,7 @@ static int affine_move_task(struct rq *r
                            int dest_cpu, unsigned int flags)
 {
        struct set_affinity_pending my_pending = { }, *pending = NULL;
-       bool complete = false;
+       bool stop_pending, complete = false;
 
        /* Can the task run on the task's current CPU? If so, we're done */
        if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
@@ -2256,14 +2260,19 @@ static int affine_move_task(struct rq *r
                 * anything else we cannot do is_migration_disabled(), punt
                 * and have the stopper function handle it all race-free.
                 */
+               stop_pending = pending->stop_pending;
+               if (!stop_pending)
+                       pending->stop_pending = true;
 
                refcount_inc(&pending->refs); /* pending->{arg,stop_work} */
                if (flags & SCA_MIGRATE_ENABLE)
                        p->migration_flags &= ~MDF_PUSH;
                task_rq_unlock(rq, p, rf);
 
-               stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
-                                   &pending->arg, &pending->stop_work);
+               if (!stop_pending) {
+                       stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
+                                           &pending->arg, &pending->stop_work);
+               }
 
                if (flags & SCA_MIGRATE_ENABLE)
                        return 0;


Reply via email to