stop_two_cpus() and stop_cpus() use stop_cpus_lock to avoid the
deadlock, we need to ensure that the stopper functions can't be
queued "backwards" from one another.

Instead, we can change stop_two_cpus() to take 2 stopper->lock's
and queue both works "atomically"; just we need to check that both
->stop_work's on these CPU's are either free or already queued.

Note: this patch preserves the cpu_active() checks, but I think
we need to shift them into migrate_swap_stop(). However, we can't
do this without another cleanup: currently stopper->enabled does
not guarantee that work->fn() will be actually executed if we race
with cpu_down().

Signed-off-by: Oleg Nesterov <o...@redhat.com>
---
 include/linux/lglock.h  |    5 --
 kernel/locking/lglock.c |   22 ---------
 kernel/stop_machine.c   |  120 ++++++++++++++++++++++++++++-------------------
 3 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index c92ebd1..0081f00 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -52,15 +52,10 @@ struct lglock {
        static struct lglock name = { .lock = &name ## _lock }
 
 void lg_lock_init(struct lglock *lg, char *name);
-
 void lg_local_lock(struct lglock *lg);
 void lg_local_unlock(struct lglock *lg);
 void lg_local_lock_cpu(struct lglock *lg, int cpu);
 void lg_local_unlock_cpu(struct lglock *lg, int cpu);
-
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2);
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
-
 void lg_global_lock(struct lglock *lg);
 void lg_global_unlock(struct lglock *lg);
 
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
index 951cfcd..86ae2ae 100644
--- a/kernel/locking/lglock.c
+++ b/kernel/locking/lglock.c
@@ -60,28 +60,6 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu)
 }
 EXPORT_SYMBOL(lg_local_unlock_cpu);
 
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2)
-{
-       BUG_ON(cpu1 == cpu2);
-
-       /* lock in cpu order, just like lg_global_lock */
-       if (cpu2 < cpu1)
-               swap(cpu1, cpu2);
-
-       preempt_disable();
-       lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
-       arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
-       arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
-}
-
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
-{
-       lock_release(&lg->lock_dep_map, 1, _RET_IP_);
-       arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
-       arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
-       preempt_enable();
-}
-
 void lg_global_lock(struct lglock *lg)
 {
        int i;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 12484e5..20fb291 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,7 +20,6 @@
 #include <linux/kallsyms.h>
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
-#include <linux/lglock.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
@@ -47,14 +46,6 @@ struct cpu_stopper {
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
 static bool stop_machine_initialized = false;
 
-/*
- * Avoids a race between stop_two_cpus and global stop_cpus, where
- * the stoppers could get queued up in reverse order, leading to
- * system deadlock. Using an lglock means stop_two_cpus remains
- * relatively cheap.
- */
-DEFINE_STATIC_LGLOCK(stop_cpus_lock);
-
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int 
nr_todo)
 {
        memset(done, 0, sizeof(*done));
@@ -73,21 +64,29 @@ static void cpu_stop_signal_done(struct cpu_stop_done 
*done, bool executed)
        }
 }
 
+static inline bool stop_work_pending(struct cpu_stopper *stopper)
+{
+       return !list_empty(&stopper->stop_work.list);
+}
+
+static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
+                                       struct cpu_stop_work *work)
+{
+       list_add_tail(&work->list, &stopper->works);
+       wake_up_process(stopper->thread);
+}
+
 /* queue @work to @stopper.  if offline, @work is completed immediately */
 static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-
        unsigned long flags;
 
        spin_lock_irqsave(&stopper->lock, flags);
-
-       if (stopper->enabled) {
-               list_add_tail(&work->list, &stopper->works);
-               wake_up_process(stopper->thread);
-       } else
+       if (stopper->enabled)
+               __cpu_stop_queue_work(stopper, work);
+       else
                cpu_stop_signal_done(work->done, false);
-
        spin_unlock_irqrestore(&stopper->lock, flags);
 }
 
@@ -213,6 +212,48 @@ static int multi_cpu_stop(void *data)
        return err;
 }
 
+static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
+                                   int cpu2, struct cpu_stop_work *work2)
+{
+       struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
+       struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+       int err;
+retry:
+       spin_lock_irq(&stopper1->lock);
+       spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+       /*
+        * If we observe both CPUs active we know _cpu_down() cannot yet have
+        * queued its stop_machine works and therefore ours will get executed
+        * first. Or its not either one of our CPUs that's getting unplugged,
+        * in which case we don't care.
+        */
+       err = -ENOENT;
+       if (!cpu_active(cpu1) || !cpu_active(cpu2))
+               goto unlock;
+
+       WARN_ON(!stopper1->enabled || !stopper2->enabled);
+       /*
+        * Ensure that if we race with stop_cpus() the stoppers won't
+        * get queued up in reverse order, leading to system deadlock.
+        */
+       err = -EDEADLK;
+       if (stop_work_pending(stopper1) != stop_work_pending(stopper2))
+               goto unlock;
+
+       err = 0;
+       __cpu_stop_queue_work(stopper1, work1);
+       __cpu_stop_queue_work(stopper2, work2);
+unlock:
+       spin_unlock(&stopper2->lock);
+       spin_unlock_irq(&stopper1->lock);
+
+       if (unlikely(err == -EDEADLK)) {
+               cond_resched();
+               goto retry;
+       }
+       return err;
+}
+
 /**
  * stop_two_cpus - stops two cpus
  * @cpu1: the cpu to stop
@@ -228,48 +269,28 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, 
cpu_stop_fn_t fn, void *
 {
        struct cpu_stop_done done;
        struct cpu_stop_work work1, work2;
-       struct multi_stop_data msdata;
-
-       preempt_disable();
-       msdata = (struct multi_stop_data){
+       struct multi_stop_data msdata = {
                .fn = fn,
                .data = arg,
                .num_threads = 2,
                .active_cpus = cpumask_of(cpu1),
        };
 
-       work1 = work2 = (struct cpu_stop_work){
-               .fn = multi_cpu_stop,
-               .arg = &msdata,
-               .done = &done
-       };
-
-       cpu_stop_init_done(&done, 2);
        set_state(&msdata, MULTI_STOP_PREPARE);
+       cpu_stop_init_done(&done, 2);
 
-       /*
-        * If we observe both CPUs active we know _cpu_down() cannot yet have
-        * queued its stop_machine works and therefore ours will get executed
-        * first. Or its not either one of our CPUs that's getting unplugged,
-        * in which case we don't care.
-        *
-        * This relies on the stopper workqueues to be FIFO.
-        */
-       if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
-               preempt_enable();
-               return -ENOENT;
-       }
-
-       lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
-       cpu_stop_queue_work(cpu1, &work1);
-       cpu_stop_queue_work(cpu2, &work2);
-       lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
+       work1.fn   = work2.fn   = multi_cpu_stop;
+       work1.arg  = work2.arg  = &msdata;
+       work1.done = work2.done = &done;
 
-       preempt_enable();
+       if (cpu1 > cpu2)
+               swap(cpu1, cpu2);
+       if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
+               return -ENOENT;
 
        wait_for_completion(&done.completion);
-
-       return done.executed ? done.ret : -ENOENT;
+       WARN_ON(!done.executed);
+       return done.ret;
 }
 
 /**
@@ -308,7 +329,7 @@ static void queue_stop_cpus_work(const struct cpumask 
*cpumask,
         * preempted by a stopper which might wait for other stoppers
         * to enter @fn which can lead to deadlock.
         */
-       lg_global_lock(&stop_cpus_lock);
+       preempt_disable();
        for_each_cpu(cpu, cpumask) {
                work = &per_cpu(cpu_stopper.stop_work, cpu);
                work->fn = fn;
@@ -316,7 +337,7 @@ static void queue_stop_cpus_work(const struct cpumask 
*cpumask,
                work->done = done;
                cpu_stop_queue_work(cpu, work);
        }
-       lg_global_unlock(&stop_cpus_lock);
+       preempt_enable();
 }
 
 static int __stop_cpus(const struct cpumask *cpumask,
@@ -505,6 +526,7 @@ static int __init cpu_stop_init(void)
 
                spin_lock_init(&stopper->lock);
                INIT_LIST_HEAD(&stopper->works);
+               INIT_LIST_HEAD(&stopper->stop_work.list);
        }
 
        BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
-- 
1.5.5.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to