The cpu hotplug lock is a rwsem with read-in-write and read-in-read
recursion. Implement it as such.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 include/linux/cpu.h          |    6 +
 include/linux/percpu-rwsem.h |   10 ++-
 include/linux/sched.h        |    4 +
 init/main.c                  |    1 
 kernel/cpu.c                 |  133 +++++++++++++------------------------------
 kernel/fork.c                |    2 
 lib/Kconfig                  |    5 +
 7 files changed, 66 insertions(+), 95 deletions(-)

--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -224,6 +224,9 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init(void);
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
@@ -242,6 +245,9 @@ int cpu_down(unsigned int cpu);
 
 #else          /* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init(void) {}
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()      do { } while (0)
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -20,12 +20,10 @@ extern void __percpu_down_read(struct pe
 extern bool __percpu_down_read_trylock(struct percpu_rw_semaphore *);
 extern void __percpu_up_read(struct percpu_rw_semaphore *);
 
-static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+static inline void _percpu_down_read(struct percpu_rw_semaphore *sem)
 {
        might_sleep();
 
-       rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
-
        preempt_disable();
        /*
         * We are in an RCU-sched read-side critical section, so the writer
@@ -46,6 +44,12 @@ static inline void percpu_down_read(stru
         */
 }
 
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+       rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+       _percpu_down_read(sem);
+}
+
 static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
        bool ret = true;
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1389,6 +1389,10 @@ struct task_struct {
        unsigned int btrace_seq;
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+       int cpuhp_ref;
+#endif
+
        unsigned int policy;
        int nr_cpus_allowed;
        cpumask_t cpus_allowed;
--- a/init/main.c
+++ b/init/main.c
@@ -588,6 +588,7 @@ asmlinkage __visible void __init start_k
        sched_clock_postinit();
        perf_event_init();
        profile_init();
+       cpu_hotplug_init();
        call_function_init();
        WARN(!irqs_disabled(), "Interrupts were enabled early\n");
        early_boot_irqs_disabled = false;
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -22,6 +22,7 @@
 #include <linux/lockdep.h>
 #include <linux/tick.h>
 #include <trace/events/power.h>
+#include <linux/percpu-rwsem.h>
 
 #include "smpboot.h"
 
@@ -50,7 +51,8 @@ EXPORT_SYMBOL(cpu_notifier_register_done
 
 static RAW_NOTIFIER_HEAD(cpu_chain);
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
@@ -58,126 +60,72 @@ static int cpu_hotplug_disabled;
 #ifdef CONFIG_HOTPLUG_CPU
 
 static struct {
-       struct task_struct *active_writer;
-       /* wait queue to wake up the active_writer */
-       wait_queue_head_t wq;
-       /* verifies that no writer will get active while readers are active */
-       struct mutex lock;
-       /*
-        * Also blocks the new readers during
-        * an ongoing cpu hotplug operation.
-        */
-       atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-       .active_writer = NULL,
-       .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-       .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       .dep_map = {.name = "cpu_hotplug.lock" },
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
-                                 lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
+       struct percpu_rw_semaphore      rwsem;
+       struct task_struct              *writer;
+} cpu_hotplug = { .writer = &init_task, };
+
+void cpu_hotplug_init(void)
+{
+       percpu_init_rwsem(&cpu_hotplug.rwsem);
+       cpu_hotplug.writer = NULL;
+}
 
+void cpu_hotplug_init_task(struct task_struct *p)
+{
+       p->cpuhp_ref = 0;
+}
 
 void get_online_cpus(void)
 {
        might_sleep();
-       if (cpu_hotplug.active_writer == current)
+
+       /* read in write recursion */
+       if (cpu_hotplug.writer == current)
+               return;
+
+       /* read in read recursion */
+       if (current->cpuhp_ref++)
                return;
-       cpuhp_lock_acquire_read();
-       mutex_lock(&cpu_hotplug.lock);
-       atomic_inc(&cpu_hotplug.refcount);
-       mutex_unlock(&cpu_hotplug.lock);
+
+       lock_map_acquire_read(&cpu_hotplug.rwsem.rw_sem.dep_map);
+       _percpu_down_read(&cpu_hotplug.rwsem);
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 bool try_get_online_cpus(void)
 {
-       if (cpu_hotplug.active_writer == current)
+       if (cpu_hotplug.writer == current)
                return true;
-       if (!mutex_trylock(&cpu_hotplug.lock))
-               return false;
-       cpuhp_lock_acquire_tryread();
-       atomic_inc(&cpu_hotplug.refcount);
-       mutex_unlock(&cpu_hotplug.lock);
-       return true;
+
+       if (current->cpuhp_ref++)
+               return true;
+
+       return percpu_down_read_trylock(&cpu_hotplug.rwsem);
 }
 EXPORT_SYMBOL_GPL(try_get_online_cpus);
 
 void put_online_cpus(void)
 {
-       int refcount;
-
-       if (cpu_hotplug.active_writer == current)
+       if (cpu_hotplug.writer == current)
                return;
 
-       refcount = atomic_dec_return(&cpu_hotplug.refcount);
-       if (WARN_ON(refcount < 0)) /* try to fix things up */
-               atomic_inc(&cpu_hotplug.refcount);
-
-       if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-               wake_up(&cpu_hotplug.wq);
-
-       cpuhp_lock_release();
+       if (--current->cpuhp_ref)
+               return;
 
+       percpu_up_read(&cpu_hotplug.rwsem);
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
 void cpu_hotplug_begin(void)
 {
-       DEFINE_WAIT(wait);
-
-       cpu_hotplug.active_writer = current;
-       cpuhp_lock_acquire();
-
-       for (;;) {
-               mutex_lock(&cpu_hotplug.lock);
-               prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-               if (likely(!atomic_read(&cpu_hotplug.refcount)))
-                               break;
-               mutex_unlock(&cpu_hotplug.lock);
-               schedule();
-       }
-       finish_wait(&cpu_hotplug.wq, &wait);
+       percpu_down_write(&cpu_hotplug.rwsem);
+       cpu_hotplug.writer = current;
 }
 
 void cpu_hotplug_done(void)
 {
-       cpu_hotplug.active_writer = NULL;
-       mutex_unlock(&cpu_hotplug.lock);
-       cpuhp_lock_release();
+       cpu_hotplug.writer = NULL;
+       percpu_up_write(&cpu_hotplug.rwsem);
 }
 
 /*
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1410,6 +1410,8 @@ static struct task_struct *copy_process(
        p->sequential_io_avg    = 0;
 #endif
 
+       cpu_hotplug_init_task(p);
+
        /* Perform scheduler related setup. Assign this task to a CPU. */
        retval = sched_fork(clone_flags, p);
        if (retval)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -56,6 +56,11 @@ config STMP_DEVICE
 config PERCPU_RWSEM
        bool
 
+config PERCPU_RWSEM_HOTPLUG
+       def_bool y
+       depends on HOTPLUG_CPU
+       select PERCPU_RWSEM
+
 config ARCH_USE_CMPXCHG_LOCKREF
        bool
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to