Linus,

please pull the latest smp-hotplug-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
smp-hotplug-for-linus

This is the first part of the ongoing cpu hotplug rework:

  - Initial implementation of the state machine

  - Runs all online and prepare down callbacks on the plugged cpu and not on
    some random processor

  - Replaces busy loop waiting with completions

  - Adds tracepoints so the states can be followed

Further information is here:

 http://lkml.kernel.org/r/20160226164321.657646...@linutronix.de

Thanks,

        tglx

------------------>
Thomas Gleixner (23):
      cpu/hotplug: Restructure FROZEN state handling
      cpu/hotplug: Restructure cpu_up code
      cpu/hotplug: Split out cpu down functions
      cpu/hotplug: Add tracepoints
      cpu/hotplug: Convert to a state machine for the control processor
      cpu/hotplug: Convert the hotplugged cpu work to a state machine
      cpu/hotplug: Hand in target state to _cpu_up/down
      cpu/hotplug: Add sysfs state interface
      cpu/hotplug: Make target state writeable
      cpu/hotplug: Implement setup/removal interface
      cpu/hotplug: Move scheduler cpu_online notifier to hotplug core
      cpu/hotplug: Unpark smpboot threads from the state machine
      cpu/hotplug: Split out the state walk into functions
      cpu/hotplug: Create hotplug threads
      cpu/hotplug: Move online calls to hotplugged cpu
      arch/hotplug: Call into idle with a proper state
      cpu/hotplug: Let upcoming cpu bring itself fully up
      cpu/hotplug: Make wait for dead cpu completion based
      rcu: Make CPU_DYING_IDLE an explicit call
      cpu/hotplug: Plug death reporting race
      cpu/hotplug: Remove redundant state check
      cpu/hotplug: Fix smpboot thread ordering
      cpu/hotplug: Document states better


 arch/alpha/kernel/smp.c         |    2 +-
 arch/arc/kernel/smp.c           |    2 +-
 arch/arm/kernel/smp.c           |    2 +-
 arch/arm64/kernel/smp.c         |    2 +-
 arch/blackfin/mach-common/smp.c |    2 +-
 arch/hexagon/kernel/smp.c       |    2 +-
 arch/ia64/kernel/smpboot.c      |    2 +-
 arch/m32r/kernel/smpboot.c      |    2 +-
 arch/metag/kernel/smp.c         |    2 +-
 arch/mips/kernel/smp.c          |    2 +-
 arch/mn10300/kernel/smp.c       |    2 +-
 arch/parisc/kernel/smp.c        |    2 +-
 arch/powerpc/kernel/smp.c       |    2 +-
 arch/s390/kernel/smp.c          |    2 +-
 arch/sh/kernel/smp.c            |    2 +-
 arch/sparc/kernel/smp_32.c      |    2 +-
 arch/sparc/kernel/smp_64.c      |    2 +-
 arch/tile/kernel/smpboot.c      |    2 +-
 arch/x86/kernel/smpboot.c       |    2 +-
 arch/x86/xen/smp.c              |    2 +-
 arch/xtensa/kernel/smp.c        |    2 +-
 include/linux/cpu.h             |   27 +-
 include/linux/cpuhotplug.h      |   93 ++++
 include/linux/notifier.h        |    2 +
 include/linux/rcupdate.h        |    4 +-
 include/trace/events/cpuhp.h    |   66 +++
 init/main.c                     |   16 +-
 kernel/cpu.c                    | 1162 ++++++++++++++++++++++++++++++++++-----
 kernel/rcu/tree.c               |   70 +--
 kernel/sched/core.c             |   10 -
 kernel/sched/idle.c             |    9 +-
 kernel/smp.c                    |    1 +
 kernel/smpboot.c                |    6 +-
 kernel/smpboot.h                |    6 +-
 lib/Kconfig.debug               |   13 +
 35 files changed, 1291 insertions(+), 236 deletions(-)
 create mode 100644 include/linux/cpuhotplug.h
 create mode 100644 include/trace/events/cpuhp.h

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 2f24447fef92..46bf263c3153 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -168,7 +168,7 @@ smp_callin(void)
              cpuid, current, current->active_mm));
 
        preempt_disable();
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 /* Wait until hwrpb->txrdy is clear for cpu.  Return -1 on timeout.  */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 424e937da5c8..4cb3add77c75 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -142,7 +142,7 @@ void start_kernel_secondary(void)
 
        local_irq_enable();
        preempt_disable();
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 /*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 37312f6749f3..baee70267f29 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void)
        /*
         * OK, it's off to the idle thread for us
         */
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e..460765799c64 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void)
        /*
         * OK, it's off to the idle thread for us
         */
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 0030e21cfceb..23c4ef5f8bdc 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -333,7 +333,7 @@ void secondary_start_kernel(void)
 
        /* We are done with local CPU inits, unblock the boot CPU. */
        set_cpu_online(cpu, true);
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void __init smp_prepare_boot_cpu(void)
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index ff759f26b96a..983bae7d2665 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -180,7 +180,7 @@ void start_secondary(void)
 
        local_irq_enable();
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0e76fad27975..74fe317477e6 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -454,7 +454,7 @@ start_secondary (void *unused)
        preempt_disable();
        smp_callin();
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
        return 0;
 }
 
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index a468467542f4..f98d2f6519d6 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -432,7 +432,7 @@ int __init start_secondary(void *unused)
         */
        local_flush_tlb_all();
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
        return 0;
 }
 
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c3c6f0864881..bad13232de51 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void)
        /*
         * OK, it's off to the idle thread for us
         */
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index bd4385a8e6e8..f2112a8ddf15 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -191,7 +191,7 @@ asmlinkage void start_secondary(void)
        WARN_ON_ONCE(!irqs_disabled());
        mp_ops->smp_finish();
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 static void stop_this_cpu(void *dummy)
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index f984193718b1..426173c4b0b9 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -675,7 +675,7 @@ int __init start_secondary(void *unused)
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
        init_clockevents();
 #endif
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
        return 0;
 }
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 52e85973a283..c2a9cc55a62f 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -305,7 +305,7 @@ void __init smp_callin(void)
 
        local_irq_enable();  /* Interrupts have been off until now */
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
        /* NOTREACHED */
        panic("smp_callin() AAAAaaaaahhhh....\n");
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec2058d2d..cc13d4c83291 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -727,7 +727,7 @@ void start_secondary(void *unused)
 
        local_irq_enable();
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
        BUG();
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3c65a8eae34d..40a6b4f9c36c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
        set_cpu_online(smp_processor_id(), true);
        inc_irq_stat(CPU_RST);
        local_irq_enable();
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 /* Upping and downing of CPUs */
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index de6be008fc01..13f633add29a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -203,7 +203,7 @@ asmlinkage void start_secondary(void)
        set_cpu_online(cpu, true);
        per_cpu(cpu_state, cpu) = CPU_ONLINE;
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 extern struct {
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index b3a5d81b20f0..fb30e7c6a5b1 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg)
        local_irq_enable();
 
        wmb();
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
        /* We should never reach here! */
        BUG();
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 19cd08d18672..8a6151a628ce 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -134,7 +134,7 @@ void smp_callin(void)
 
        local_irq_enable();
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void cpu_panic(void)
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 20d52a98e171..6c0abaacec33 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -208,7 +208,7 @@ void online_secondary(void)
        /* Set up tile-timer clock-event device on this cpu */
        setup_tile_timer();
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 24d57f77b3c1..293b22a7ab02 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -248,7 +248,7 @@ static void notrace start_secondary(void *unused)
        x86_cpuinit.setup_percpu_clockev();
 
        wmb();
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void __init smp_store_boot_cpu_info(void)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3f4ebf0261f2..3c6d17fd423a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -112,7 +112,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu)
                xen_pvh_secondary_vcpu_init(cpu);
 #endif
        cpu_bringup();
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 static void xen_smp_intr_free(unsigned int cpu)
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index 4d02e38514f5..fc4ad21a5ed4 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -157,7 +157,7 @@ void secondary_start_kernel(void)
 
        complete(&cpu_running);
 
-       cpu_startup_entry(CPUHP_ONLINE);
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 static void mx_cpu_start(void *p)
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d2ca8c38f9c4..f9b1fab4388a 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -16,6 +16,7 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
+#include <linux/cpuhotplug.h>
 
 struct device;
 struct device_node;
@@ -27,6 +28,9 @@ struct cpu {
        struct device dev;
 };
 
+extern void boot_cpu_init(void);
+extern void boot_cpu_state_init(void);
+
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
 extern bool cpu_is_hotpluggable(unsigned cpu);
@@ -74,7 +78,7 @@ enum {
        /* migration should happen before other stuff but after perf */
        CPU_PRI_PERF            = 20,
        CPU_PRI_MIGRATION       = 10,
-       CPU_PRI_SMPBOOT         = 9,
+
        /* bring up workqueues before normal notifiers and down after */
        CPU_PRI_WORKQUEUE_UP    = 5,
        CPU_PRI_WORKQUEUE_DOWN  = -5,
@@ -97,9 +101,7 @@ enum {
                                        * Called on the new cpu, just before
                                        * enabling interrupts. Must not sleep,
                                        * must not fail */
-#define CPU_DYING_IDLE         0x000B /* CPU (unsigned)v dying, reached
-                                       * idle loop. */
-#define CPU_BROKEN             0x000C /* CPU (unsigned)v did not die properly,
+#define CPU_BROKEN             0x000B /* CPU (unsigned)v did not die properly,
                                        * perhaps due to preemption. */
 
 /* Used for CPU hotplug events occurring while tasks are frozen due to a 
suspend
@@ -118,6 +120,7 @@ enum {
 
 
 #ifdef CONFIG_SMP
+extern bool cpuhp_tasks_frozen;
 /* Need to know about CPUs going up/down? */
 #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
 #define cpu_notifier(fn, pri) {                                        \
@@ -167,7 +170,6 @@ static inline void __unregister_cpu_notifier(struct 
notifier_block *nb)
 }
 #endif
 
-void smpboot_thread_init(void);
 int cpu_up(unsigned int cpu);
 void notify_cpu_starting(unsigned int cpu);
 extern void cpu_maps_update_begin(void);
@@ -177,6 +179,7 @@ extern void cpu_maps_update_done(void);
 #define cpu_notifier_register_done     cpu_maps_update_done
 
 #else  /* CONFIG_SMP */
+#define cpuhp_tasks_frozen     0
 
 #define cpu_notifier(fn, pri)  do { (void)(fn); } while (0)
 #define __cpu_notifier(fn, pri)        do { (void)(fn); } while (0)
@@ -215,10 +218,6 @@ static inline void cpu_notifier_register_done(void)
 {
 }
 
-static inline void smpboot_thread_init(void)
-{
-}
-
 #endif /* CONFIG_SMP */
 extern struct bus_type cpu_subsys;
 
@@ -265,11 +264,6 @@ static inline int disable_nonboot_cpus(void) { return 0; }
 static inline void enable_nonboot_cpus(void) {}
 #endif /* !CONFIG_PM_SLEEP_SMP */
 
-enum cpuhp_state {
-       CPUHP_OFFLINE,
-       CPUHP_ONLINE,
-};
-
 void cpu_startup_entry(enum cpuhp_state state);
 
 void cpu_idle_poll_ctrl(bool enable);
@@ -280,14 +274,15 @@ void arch_cpu_idle_enter(void);
 void arch_cpu_idle_exit(void);
 void arch_cpu_idle_dead(void);
 
-DECLARE_PER_CPU(bool, cpu_dead_idle);
-
 int cpu_report_state(int cpu);
 int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
 bool cpu_wait_death(unsigned int cpu, int seconds);
 bool cpu_report_death(void);
+void cpuhp_report_idle_dead(void);
+#else
+static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
new file mode 100644
index 000000000000..5d68e15e46b7
--- /dev/null
+++ b/include/linux/cpuhotplug.h
@@ -0,0 +1,93 @@
+#ifndef __CPUHOTPLUG_H
+#define __CPUHOTPLUG_H
+
+enum cpuhp_state {
+       CPUHP_OFFLINE,
+       CPUHP_CREATE_THREADS,
+       CPUHP_NOTIFY_PREPARE,
+       CPUHP_BRINGUP_CPU,
+       CPUHP_AP_IDLE_DEAD,
+       CPUHP_AP_OFFLINE,
+       CPUHP_AP_NOTIFY_STARTING,
+       CPUHP_AP_ONLINE,
+       CPUHP_TEARDOWN_CPU,
+       CPUHP_AP_ONLINE_IDLE,
+       CPUHP_AP_SMPBOOT_THREADS,
+       CPUHP_AP_NOTIFY_ONLINE,
+       CPUHP_AP_ONLINE_DYN,
+       CPUHP_AP_ONLINE_DYN_END         = CPUHP_AP_ONLINE_DYN + 30,
+       CPUHP_ONLINE,
+};
+
+int __cpuhp_setup_state(enum cpuhp_state state,        const char *name, bool 
invoke,
+                       int (*startup)(unsigned int cpu),
+                       int (*teardown)(unsigned int cpu));
+
+/**
+ * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
+ * @state:     The state for which the calls are installed
+ * @name:      Name of the callback (will be used in debug output)
+ * @startup:   startup callback function
+ * @teardown:  teardown callback function
+ *
+ * Installs the callback functions and invokes the startup callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline int cpuhp_setup_state(enum cpuhp_state state,
+                                   const char *name,
+                                   int (*startup)(unsigned int cpu),
+                                   int (*teardown)(unsigned int cpu))
+{
+       return __cpuhp_setup_state(state, name, true, startup, teardown);
+}
+
+/**
+ * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling 
the
+ *                            callbacks
+ * @state:     The state for which the calls are installed
+ * @name:      Name of the callback.
+ * @startup:   startup callback function
+ * @teardown:  teardown callback function
+ *
+ * Same as @cpuhp_setup_state except that no calls are executed are invoked
+ * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n.
+ */
+static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
+                                           const char *name,
+                                           int (*startup)(unsigned int cpu),
+                                           int (*teardown)(unsigned int cpu))
+{
+       return __cpuhp_setup_state(state, name, false, startup, teardown);
+}
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+
+/**
+ * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
+ * @state:     The state for which the calls are removed
+ *
+ * Removes the callback functions and invokes the teardown callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline void cpuhp_remove_state(enum cpuhp_state state)
+{
+       __cpuhp_remove_state(state, true);
+}
+
+/**
+ * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking
+ *                             teardown
+ * @state:     The state for which the calls are removed
+ */
+static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
+{
+       __cpuhp_remove_state(state, false);
+}
+
+#ifdef CONFIG_SMP
+void cpuhp_online_idle(enum cpuhp_state state);
+#else
+static inline void cpuhp_online_idle(enum cpuhp_state state) { }
+#endif
+
+#endif
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index d14a4c362465..4149868de4e6 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -47,6 +47,8 @@
  * runtime initialization.
  */
 
+struct notifier_block;
+
 typedef        int (*notifier_fn_t)(struct notifier_block *nb,
                        unsigned long action, void *data);
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 14e6f47ee16f..fc46fe3ea259 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -332,9 +332,7 @@ void rcu_init(void);
 void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
-struct notifier_block;
-int rcu_cpu_notify(struct notifier_block *self,
-                  unsigned long action, void *hcpu);
+void rcu_report_dead(unsigned int cpu);
 
 #ifndef CONFIG_TINY_RCU
 void rcu_end_inkernel_boot(void);
diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h
new file mode 100644
index 000000000000..a72bd93ec7e5
--- /dev/null
+++ b/include/trace/events/cpuhp.h
@@ -0,0 +1,66 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpuhp
+
+#if !defined(_TRACE_CPUHP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUHP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(cpuhp_enter,
+
+       TP_PROTO(unsigned int cpu,
+                int target,
+                int idx,
+                int (*fun)(unsigned int)),
+
+       TP_ARGS(cpu, target, idx, fun),
+
+       TP_STRUCT__entry(
+               __field( unsigned int,  cpu             )
+               __field( int,           target          )
+               __field( int,           idx             )
+               __field( void *,        fun             )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu    = cpu;
+               __entry->target = target;
+               __entry->idx    = idx;
+               __entry->fun    = fun;
+       ),
+
+       TP_printk("cpu: %04u target: %3d step: %3d (%pf)",
+                 __entry->cpu, __entry->target, __entry->idx, __entry->fun)
+);
+
+TRACE_EVENT(cpuhp_exit,
+
+       TP_PROTO(unsigned int cpu,
+                int state,
+                int idx,
+                int ret),
+
+       TP_ARGS(cpu, state, idx, ret),
+
+       TP_STRUCT__entry(
+               __field( unsigned int,  cpu             )
+               __field( int,           state           )
+               __field( int,           idx             )
+               __field( int,           ret             )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu    = cpu;
+               __entry->state  = state;
+               __entry->idx    = idx;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk(" cpu: %04u  state: %3d step: %3d ret: %d",
+                 __entry->cpu, __entry->state, __entry->idx,  __entry->ret)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/main.c b/init/main.c
index 58c9e374704b..55563fd36be3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -388,7 +388,6 @@ static noinline void __init_refok rest_init(void)
        int pid;
 
        rcu_scheduler_starting();
-       smpboot_thread_init();
        /*
         * We need to spawn init first so that it obtains pid 1, however
         * the init task will end up wanting to create kthreads, which, if
@@ -452,20 +451,6 @@ void __init parse_early_param(void)
        done = 1;
 }
 
-/*
- *     Activate the first processor.
- */
-
-static void __init boot_cpu_init(void)
-{
-       int cpu = smp_processor_id();
-       /* Mark the boot cpu "present", "online" etc for SMP and UP case */
-       set_cpu_online(cpu, true);
-       set_cpu_active(cpu, true);
-       set_cpu_present(cpu, true);
-       set_cpu_possible(cpu, true);
-}
-
 void __init __weak smp_setup_processor_id(void)
 {
 }
@@ -530,6 +515,7 @@ asmlinkage __visible void __init start_kernel(void)
        setup_command_line(command_line);
        setup_nr_cpu_ids();
        setup_per_cpu_areas();
+       boot_cpu_state_init();
        smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
 
        build_all_zonelists(NULL, NULL);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b9d39633ce9..6ea42e8da861 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -22,13 +22,88 @@
 #include <linux/lockdep.h>
 #include <linux/tick.h>
 #include <linux/irq.h>
+#include <linux/smpboot.h>
+
 #include <trace/events/power.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpuhp.h>
 
 #include "smpboot.h"
 
+/**
+ * cpuhp_cpu_state - Per cpu hotplug state storage
+ * @state:     The current cpu state
+ * @target:    The target state
+ * @thread:    Pointer to the hotplug thread
+ * @should_run:        Thread should execute
+ * @cb_stat:   The state for a single callback (install/uninstall)
+ * @cb:                Single callback function (install/uninstall)
+ * @result:    Result of the operation
+ * @done:      Signal completion to the issuer of the task
+ */
+struct cpuhp_cpu_state {
+       enum cpuhp_state        state;
+       enum cpuhp_state        target;
+#ifdef CONFIG_SMP
+       struct task_struct      *thread;
+       bool                    should_run;
+       enum cpuhp_state        cb_state;
+       int                     (*cb)(unsigned int cpu);
+       int                     result;
+       struct completion       done;
+#endif
+};
+
+static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+
+/**
+ * cpuhp_step - Hotplug state machine step
+ * @name:      Name of the step
+ * @startup:   Startup function of the step
+ * @teardown:  Teardown function of the step
+ * @skip_onerr:        Do not invoke the functions on error rollback
+ *             Will go away once the notifiers are gone
+ * @cant_stop: Bringup/teardown can't be stopped at this step
+ */
+struct cpuhp_step {
+       const char      *name;
+       int             (*startup)(unsigned int cpu);
+       int             (*teardown)(unsigned int cpu);
+       bool            skip_onerr;
+       bool            cant_stop;
+};
+
+static DEFINE_MUTEX(cpuhp_state_mutex);
+static struct cpuhp_step cpuhp_bp_states[];
+static struct cpuhp_step cpuhp_ap_states[];
+
+/**
+ * cpuhp_invoke_callback _ Invoke the callbacks for a given state
+ * @cpu:       The cpu for which the callback should be invoked
+ * @step:      The step in the state machine
+ * @cb:                The callback function to invoke
+ *
+ * Called from cpu hotplug and from the state register machinery
+ */
+static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
+                                int (*cb)(unsigned int))
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       int ret = 0;
+
+       if (cb) {
+               trace_cpuhp_enter(cpu, st->target, step, cb);
+               ret = cb(cpu);
+               trace_cpuhp_exit(cpu, st->state, step, ret);
+       }
+       return ret;
+}
+
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
+bool cpuhp_tasks_frozen;
+EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
 
 /*
  * The following two APIs (cpu_maps_update_begin/done) must be used when
@@ -207,31 +282,281 @@ int __register_cpu_notifier(struct notifier_block *nb)
        return raw_notifier_chain_register(&cpu_chain, nb);
 }
 
-static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
+static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
                        int *nr_calls)
 {
+       unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
+       void *hcpu = (void *)(long)cpu;
+
        int ret;
 
-       ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
+       ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
                                        nr_calls);
 
        return notifier_to_errno(ret);
 }
 
-static int cpu_notify(unsigned long val, void *v)
+static int cpu_notify(unsigned long val, unsigned int cpu)
 {
-       return __cpu_notify(val, v, -1, NULL);
+       return __cpu_notify(val, cpu, -1, NULL);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
+/* Notifier wrappers for transitioning to state machine */
+static int notify_prepare(unsigned int cpu)
+{
+       int nr_calls = 0;
+       int ret;
+
+       ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
+       if (ret) {
+               nr_calls--;
+               printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
+                               __func__, cpu);
+               __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
+       }
+       return ret;
+}
+
+static int notify_online(unsigned int cpu)
+{
+       cpu_notify(CPU_ONLINE, cpu);
+       return 0;
+}
+
+static int notify_starting(unsigned int cpu)
+{
+       cpu_notify(CPU_STARTING, cpu);
+       return 0;
+}
+
+static int bringup_wait_for_ap(unsigned int cpu)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+       wait_for_completion(&st->done);
+       return st->result;
+}
+
+static int bringup_cpu(unsigned int cpu)
+{
+       struct task_struct *idle = idle_thread_get(cpu);
+       int ret;
+
+       /* Arch-specific enabling code. */
+       ret = __cpu_up(cpu, idle);
+       if (ret) {
+               cpu_notify(CPU_UP_CANCELED, cpu);
+               return ret;
+       }
+       ret = bringup_wait_for_ap(cpu);
+       BUG_ON(!cpu_online(cpu));
+       return ret;
+}
+
+/*
+ * Hotplug state machine related functions
+ */
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
+                         struct cpuhp_step *steps)
+{
+       for (st->state++; st->state < st->target; st->state++) {
+               struct cpuhp_step *step = steps + st->state;
+
+               if (!step->skip_onerr)
+                       cpuhp_invoke_callback(cpu, st->state, step->startup);
+       }
+}
+
+static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+                               struct cpuhp_step *steps, enum cpuhp_state 
target)
+{
+       enum cpuhp_state prev_state = st->state;
+       int ret = 0;
+
+       for (; st->state > target; st->state--) {
+               struct cpuhp_step *step = steps + st->state;
+
+               ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+               if (ret) {
+                       st->target = prev_state;
+                       undo_cpu_down(cpu, st, steps);
+                       break;
+               }
+       }
+       return ret;
+}
+
+static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
+                       struct cpuhp_step *steps)
+{
+       for (st->state--; st->state > st->target; st->state--) {
+               struct cpuhp_step *step = steps + st->state;
+
+               if (!step->skip_onerr)
+                       cpuhp_invoke_callback(cpu, st->state, step->teardown);
+       }
+}
+
+static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+                             struct cpuhp_step *steps, enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state = st->state;
+       int ret = 0;
+
+       while (st->state < target) {
+               struct cpuhp_step *step;
+
+               st->state++;
+               step = steps + st->state;
+               ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+               if (ret) {
+                       st->target = prev_state;
+                       undo_cpu_up(cpu, st, steps);
+                       break;
+               }
+       }
+       return ret;
+}
+
+/*
+ * The cpu hotplug threads manage the bringup and teardown of the cpus
+ */
+static void cpuhp_create(unsigned int cpu)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+       init_completion(&st->done);
+}
+
+static int cpuhp_should_run(unsigned int cpu)
+{
+       struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+       return st->should_run;
+}
+
+/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
+static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+       enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
+
+       return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+}
+
+/* Execute the online startup callbacks. Used to be CPU_ONLINE */
+static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+       return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+}
+
+/*
+ * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
+ * callbacks when a state gets [un]installed at runtime.
+ */
+static void cpuhp_thread_fun(unsigned int cpu)
+{
+       struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+       int ret = 0;
+
+       /*
+        * Paired with the mb() in cpuhp_kick_ap_work and
+        * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+        */
+       smp_mb();
+       if (!st->should_run)
+               return;
+
+       st->should_run = false;
+
+       /* Single callback invocation for [un]install ? */
+       if (st->cb) {
+               if (st->cb_state < CPUHP_AP_ONLINE) {
+                       local_irq_disable();
+                       ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+                       local_irq_enable();
+               } else {
+                       ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+               }
+       } else {
+               /* Cannot happen .... */
+               BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+
+               /* Regular hotplug work */
+               if (st->state < st->target)
+                       ret = cpuhp_ap_online(cpu, st);
+               else if (st->state > st->target)
+                       ret = cpuhp_ap_offline(cpu, st);
+       }
+       st->result = ret;
+       complete(&st->done);
+}
 
-static void cpu_notify_nofail(unsigned long val, void *v)
+/* Invoke a single callback on a remote cpu */
+static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
+                                   int (*cb)(unsigned int))
 {
-       BUG_ON(cpu_notify(val, v));
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+       if (!cpu_online(cpu))
+               return 0;
+
+       st->cb_state = state;
+       st->cb = cb;
+       /*
+        * Make sure the above stores are visible before should_run becomes
+        * true. Paired with the mb() above in cpuhp_thread_fun()
+        */
+       smp_mb();
+       st->should_run = true;
+       wake_up_process(st->thread);
+       wait_for_completion(&st->done);
+       return st->result;
 }
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
+{
+       st->result = 0;
+       st->cb = NULL;
+       /*
+        * Make sure the above stores are visible before should_run becomes
+        * true. Paired with the mb() above in cpuhp_thread_fun()
+        */
+       smp_mb();
+       st->should_run = true;
+       wake_up_process(st->thread);
+}
+
+static int cpuhp_kick_ap_work(unsigned int cpu)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       enum cpuhp_state state = st->state;
+
+       trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+       __cpuhp_kick_ap_work(st);
+       wait_for_completion(&st->done);
+       trace_cpuhp_exit(cpu, st->state, state, st->result);
+       return st->result;
+}
+
+static struct smp_hotplug_thread cpuhp_threads = {
+       .store                  = &cpuhp_state.thread,
+       .create                 = &cpuhp_create,
+       .thread_should_run      = cpuhp_should_run,
+       .thread_fn              = cpuhp_thread_fun,
+       .thread_comm            = "cpuhp/%u",
+       .selfparking            = true,
+};
+
+void __init cpuhp_threads_init(void)
+{
+       BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
+       kthread_unpark(this_cpu_read(cpuhp_state.thread));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
 EXPORT_SYMBOL(register_cpu_notifier);
 EXPORT_SYMBOL(__register_cpu_notifier);
-
 void unregister_cpu_notifier(struct notifier_block *nb)
 {
        cpu_maps_update_begin();
@@ -311,57 +636,60 @@ static inline void check_for_tasks(int dead_cpu)
        read_unlock(&tasklist_lock);
 }
 
-struct take_cpu_down_param {
-       unsigned long mod;
-       void *hcpu;
-};
+static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
+{
+       BUG_ON(cpu_notify(val, cpu));
+}
+
+static int notify_down_prepare(unsigned int cpu)
+{
+       int err, nr_calls = 0;
+
+       err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
+       if (err) {
+               nr_calls--;
+               __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
+               pr_warn("%s: attempt to take down CPU %u failed\n",
+                               __func__, cpu);
+       }
+       return err;
+}
+
+static int notify_dying(unsigned int cpu)
+{
+       cpu_notify(CPU_DYING, cpu);
+       return 0;
+}
 
 /* Take this CPU down. */
 static int take_cpu_down(void *_param)
 {
-       struct take_cpu_down_param *param = _param;
-       int err;
+       struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+       enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
+       int err, cpu = smp_processor_id();
 
        /* Ensure this CPU doesn't handle any more interrupts. */
        err = __cpu_disable();
        if (err < 0)
                return err;
 
-       cpu_notify(CPU_DYING | param->mod, param->hcpu);
+       /* Invoke the former CPU_DYING callbacks */
+       for (; st->state > target; st->state--) {
+               struct cpuhp_step *step = cpuhp_ap_states + st->state;
+
+               cpuhp_invoke_callback(cpu, st->state, step->teardown);
+       }
        /* Give up timekeeping duties */
        tick_handover_do_timer();
        /* Park the stopper thread */
-       stop_machine_park((long)param->hcpu);
+       stop_machine_park(cpu);
        return 0;
 }
 
-/* Requires cpu_add_remove_lock to be held */
-static int _cpu_down(unsigned int cpu, int tasks_frozen)
+static int takedown_cpu(unsigned int cpu)
 {
-       int err, nr_calls = 0;
-       void *hcpu = (void *)(long)cpu;
-       unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
-       struct take_cpu_down_param tcd_param = {
-               .mod = mod,
-               .hcpu = hcpu,
-       };
-
-       if (num_online_cpus() == 1)
-               return -EBUSY;
-
-       if (!cpu_online(cpu))
-               return -EINVAL;
-
-       cpu_hotplug_begin();
-
-       err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
-       if (err) {
-               nr_calls--;
-               __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
-               pr_warn("%s: attempt to take down CPU %u failed\n",
-                       __func__, cpu);
-               goto out_release;
-       }
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       int err;
 
        /*
         * By now we've cleared cpu_active_mask, wait for all preempt-disabled
@@ -378,6 +706,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
        else
                synchronize_rcu();
 
+       /* Park the smpboot threads */
+       kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
        smpboot_park_threads(cpu);
 
        /*
@@ -389,12 +719,12 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
        /*
         * So now all preempt/rcu users must observe !cpu_active().
         */
-       err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+       err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
        if (err) {
                /* CPU didn't die: tell everyone.  Can't complain. */
-               cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+               cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
                irq_unlock_sparse();
-               goto out_release;
+               return err;
        }
        BUG_ON(cpu_online(cpu));
 
@@ -405,10 +735,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
         *
         * Wait for the stop thread to go away.
         */
-       while (!per_cpu(cpu_dead_idle, cpu))
-               cpu_relax();
-       smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
-       per_cpu(cpu_dead_idle, cpu) = false;
+       wait_for_completion(&st->done);
+       BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 
        /* Interrupts are moved away from the dying cpu, reenable alloc/free */
        irq_unlock_sparse();
@@ -417,20 +745,104 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
        /* This actually kills the CPU. */
        __cpu_die(cpu);
 
-       /* CPU is completely dead: tell everyone.  Too late to complain. */
        tick_cleanup_dead_cpu(cpu);
-       cpu_notify_nofail(CPU_DEAD | mod, hcpu);
+       return 0;
+}
 
+static int notify_dead(unsigned int cpu)
+{
+       cpu_notify_nofail(CPU_DEAD, cpu);
        check_for_tasks(cpu);
+       return 0;
+}
 
-out_release:
+static void cpuhp_complete_idle_dead(void *arg)
+{
+       struct cpuhp_cpu_state *st = arg;
+
+       complete(&st->done);
+}
+
+void cpuhp_report_idle_dead(void)
+{
+       struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+       BUG_ON(st->state != CPUHP_AP_OFFLINE);
+       rcu_report_dead(smp_processor_id());
+       st->state = CPUHP_AP_IDLE_DEAD;
+       /*
+        * We cannot call complete after rcu_report_dead() so we delegate it
+        * to an online cpu.
+        */
+       smp_call_function_single(cpumask_first(cpu_online_mask),
+                                cpuhp_complete_idle_dead, st, 0);
+}
+
+#else
+#define notify_down_prepare    NULL
+#define takedown_cpu           NULL
+#define notify_dead            NULL
+#define notify_dying           NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Requires cpu_add_remove_lock to be held */
+static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+                          enum cpuhp_state target)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       int prev_state, ret = 0;
+       bool hasdied = false;
+
+       if (num_online_cpus() == 1)
+               return -EBUSY;
+
+       if (!cpu_present(cpu))
+               return -EINVAL;
+
+       cpu_hotplug_begin();
+
+       cpuhp_tasks_frozen = tasks_frozen;
+
+       prev_state = st->state;
+       st->target = target;
+       /*
+        * If the current CPU state is in the range of the AP hotplug thread,
+        * then we need to kick the thread.
+        */
+       if (st->state > CPUHP_TEARDOWN_CPU) {
+               ret = cpuhp_kick_ap_work(cpu);
+               /*
+                * The AP side has done the error rollback already. Just
+                * return the error code..
+                */
+               if (ret)
+                       goto out;
+
+               /*
+                * We might have stopped still in the range of the AP hotplug
+                * thread. Nothing to do anymore.
+                */
+               if (st->state > CPUHP_TEARDOWN_CPU)
+                       goto out;
+       }
+       /*
+        * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
+        * to do the further cleanups.
+        */
+       ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+
+       hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
+out:
        cpu_hotplug_done();
-       if (!err)
-               cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
-       return err;
+       /* This post dead nonsense must die */
+       if (!ret && hasdied)
+               cpu_notify_nofail(CPU_POST_DEAD, cpu);
+       return ret;
 }
 
-int cpu_down(unsigned int cpu)
+static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
 {
        int err;
 
@@ -441,100 +853,131 @@ int cpu_down(unsigned int cpu)
                goto out;
        }
 
-       err = _cpu_down(cpu, 0);
+       err = _cpu_down(cpu, 0, target);
 
 out:
        cpu_maps_update_done();
        return err;
 }
+int cpu_down(unsigned int cpu)
+{
+       return do_cpu_down(cpu, CPUHP_OFFLINE);
+}
 EXPORT_SYMBOL(cpu_down);
 #endif /*CONFIG_HOTPLUG_CPU*/
 
-/*
- * Unpark per-CPU smpboot kthreads at CPU-online time.
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
  */
-static int smpboot_thread_call(struct notifier_block *nfb,
-                              unsigned long action, void *hcpu)
+void notify_cpu_starting(unsigned int cpu)
 {
-       int cpu = (long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
 
-       case CPU_DOWN_FAILED:
-       case CPU_ONLINE:
-               smpboot_unpark_threads(cpu);
-               break;
+       while (st->state < target) {
+               struct cpuhp_step *step;
 
-       default:
-               break;
+               st->state++;
+               step = cpuhp_ap_states + st->state;
+               cpuhp_invoke_callback(cpu, st->state, step->startup);
        }
-
-       return NOTIFY_OK;
 }
 
-static struct notifier_block smpboot_thread_notifier = {
-       .notifier_call = smpboot_thread_call,
-       .priority = CPU_PRI_SMPBOOT,
-};
-
-void smpboot_thread_init(void)
+/*
+ * Called from the idle task. We need to set active here, so we can kick off
+ * the stopper thread and unpark the smpboot threads. If the target state is
+ * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
+ * cpu further.
+ */
+void cpuhp_online_idle(enum cpuhp_state state)
 {
-       register_cpu_notifier(&smpboot_thread_notifier);
+       struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+       unsigned int cpu = smp_processor_id();
+
+       /* Happens for the boot cpu */
+       if (state != CPUHP_AP_ONLINE_IDLE)
+               return;
+
+       st->state = CPUHP_AP_ONLINE_IDLE;
+
+       /* The cpu is marked online, set it active now */
+       set_cpu_active(cpu, true);
+       /* Unpark the stopper thread and the hotplug thread of this cpu */
+       stop_machine_unpark(cpu);
+       kthread_unpark(st->thread);
+
+       /* Should we go further up ? */
+       if (st->target > CPUHP_AP_ONLINE_IDLE)
+               __cpuhp_kick_ap_work(st);
+       else
+               complete(&st->done);
 }
 
 /* Requires cpu_add_remove_lock to be held */
-static int _cpu_up(unsigned int cpu, int tasks_frozen)
+static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 {
-       int ret, nr_calls = 0;
-       void *hcpu = (void *)(long)cpu;
-       unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        struct task_struct *idle;
+       int ret = 0;
 
        cpu_hotplug_begin();
 
-       if (cpu_online(cpu) || !cpu_present(cpu)) {
+       if (!cpu_present(cpu)) {
                ret = -EINVAL;
                goto out;
        }
 
-       idle = idle_thread_get(cpu);
-       if (IS_ERR(idle)) {
-               ret = PTR_ERR(idle);
-               goto out;
-       }
-
-       ret = smpboot_create_threads(cpu);
-       if (ret)
+       /*
+        * The caller of do_cpu_up might have raced with another
+        * caller. Ignore it for now.
+        */
+       if (st->state >= target)
                goto out;
 
-       ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
-       if (ret) {
-               nr_calls--;
-               pr_warn("%s: attempt to bring up CPU %u failed\n",
-                       __func__, cpu);
-               goto out_notify;
+       if (st->state == CPUHP_OFFLINE) {
+               /* Let it fail before we try to bring the cpu up */
+               idle = idle_thread_get(cpu);
+               if (IS_ERR(idle)) {
+                       ret = PTR_ERR(idle);
+                       goto out;
+               }
        }
 
-       /* Arch-specific enabling code. */
-       ret = __cpu_up(cpu, idle);
-
-       if (ret != 0)
-               goto out_notify;
-       BUG_ON(!cpu_online(cpu));
+       cpuhp_tasks_frozen = tasks_frozen;
 
-       /* Now call notifier in preparation. */
-       cpu_notify(CPU_ONLINE | mod, hcpu);
+       st->target = target;
+       /*
+        * If the current CPU state is in the range of the AP hotplug thread,
+        * then we need to kick the thread once more.
+        */
+       if (st->state > CPUHP_BRINGUP_CPU) {
+               ret = cpuhp_kick_ap_work(cpu);
+               /*
+                * The AP side has done the error rollback already. Just
+                * return the error code..
+                */
+               if (ret)
+                       goto out;
+       }
 
-out_notify:
-       if (ret != 0)
-               __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+       /*
+        * Try to reach the target state. We max out on the BP at
+        * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
+        * responsible for bringing it up to the target state.
+        */
+       target = min((int)target, CPUHP_BRINGUP_CPU);
+       ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
 out:
        cpu_hotplug_done();
-
        return ret;
 }
 
-int cpu_up(unsigned int cpu)
+static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
 {
        int err = 0;
 
@@ -558,12 +1001,16 @@ int cpu_up(unsigned int cpu)
                goto out;
        }
 
-       err = _cpu_up(cpu, 0);
-
+       err = _cpu_up(cpu, 0, target);
 out:
        cpu_maps_update_done();
        return err;
 }
+
+int cpu_up(unsigned int cpu)
+{
+       return do_cpu_up(cpu, CPUHP_ONLINE);
+}
 EXPORT_SYMBOL_GPL(cpu_up);
 
 #ifdef CONFIG_PM_SLEEP_SMP
@@ -586,7 +1033,7 @@ int disable_nonboot_cpus(void)
                if (cpu == first_cpu)
                        continue;
                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
-               error = _cpu_down(cpu, 1);
+               error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
                trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
                if (!error)
                        cpumask_set_cpu(cpu, frozen_cpus);
@@ -636,7 +1083,7 @@ void enable_nonboot_cpus(void)
 
        for_each_cpu(cpu, frozen_cpus) {
                trace_suspend_resume(TPS("CPU_ON"), cpu, true);
-               error = _cpu_up(cpu, 1);
+               error = _cpu_up(cpu, 1, CPUHP_ONLINE);
                trace_suspend_resume(TPS("CPU_ON"), cpu, false);
                if (!error) {
                        pr_info("CPU%d is up\n", cpu);
@@ -709,26 +1156,463 @@ core_initcall(cpu_hotplug_pm_sync_init);
 
 #endif /* CONFIG_PM_SLEEP_SMP */
 
+#endif /* CONFIG_SMP */
+
+/* Boot processor state steps */
+static struct cpuhp_step cpuhp_bp_states[] = {
+       [CPUHP_OFFLINE] = {
+               .name                   = "offline",
+               .startup                = NULL,
+               .teardown               = NULL,
+       },
+#ifdef CONFIG_SMP
+       [CPUHP_CREATE_THREADS]= {
+               .name                   = "threads:create",
+               .startup                = smpboot_create_threads,
+               .teardown               = NULL,
+               .cant_stop              = true,
+       },
+       /*
+        * Preparatory and dead notifiers. Will be replaced once the notifiers
+        * are converted to states.
+        */
+       [CPUHP_NOTIFY_PREPARE] = {
+               .name                   = "notify:prepare",
+               .startup                = notify_prepare,
+               .teardown               = notify_dead,
+               .skip_onerr             = true,
+               .cant_stop              = true,
+       },
+       /* Kicks the plugged cpu into life */
+       [CPUHP_BRINGUP_CPU] = {
+               .name                   = "cpu:bringup",
+               .startup                = bringup_cpu,
+               .teardown               = NULL,
+               .cant_stop              = true,
+       },
+       /*
+        * Handled on controll processor until the plugged processor manages
+        * this itself.
+        */
+       [CPUHP_TEARDOWN_CPU] = {
+               .name                   = "cpu:teardown",
+               .startup                = NULL,
+               .teardown               = takedown_cpu,
+               .cant_stop              = true,
+       },
+#endif
+};
+
+/* Application processor state steps */
+static struct cpuhp_step cpuhp_ap_states[] = {
+#ifdef CONFIG_SMP
+       /* Final state before CPU kills itself */
+       [CPUHP_AP_IDLE_DEAD] = {
+               .name                   = "idle:dead",
+       },
+       /*
+        * Last state before CPU enters the idle loop to die. Transient state
+        * for synchronization.
+        */
+       [CPUHP_AP_OFFLINE] = {
+               .name                   = "ap:offline",
+               .cant_stop              = true,
+       },
+       /*
+        * Low level startup/teardown notifiers. Run with interrupts
+        * disabled. Will be removed once the notifiers are converted to
+        * states.
+        */
+       [CPUHP_AP_NOTIFY_STARTING] = {
+               .name                   = "notify:starting",
+               .startup                = notify_starting,
+               .teardown               = notify_dying,
+               .skip_onerr             = true,
+               .cant_stop              = true,
+       },
+       /* Entry state on starting. Interrupts enabled from here on. Transient
+        * state for synchronsization */
+       [CPUHP_AP_ONLINE] = {
+               .name                   = "ap:online",
+       },
+       /* Handle smpboot threads park/unpark */
+       [CPUHP_AP_SMPBOOT_THREADS] = {
+               .name                   = "smpboot:threads",
+               .startup                = smpboot_unpark_threads,
+               .teardown               = NULL,
+       },
+       /*
+        * Online/down_prepare notifiers. Will be removed once the notifiers
+        * are converted to states.
+        */
+       [CPUHP_AP_NOTIFY_ONLINE] = {
+               .name                   = "notify:online",
+               .startup                = notify_online,
+               .teardown               = notify_down_prepare,
+       },
+#endif
+       /*
+        * The dynamically registered state space is here
+        */
+
+       /* CPU is fully up and running. */
+       [CPUHP_ONLINE] = {
+               .name                   = "online",
+               .startup                = NULL,
+               .teardown               = NULL,
+       },
+};
+
+/* Sanity check for callbacks */
+static int cpuhp_cb_check(enum cpuhp_state state)
+{
+       if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
+               return -EINVAL;
+       return 0;
+}
+
+static bool cpuhp_is_ap_state(enum cpuhp_state state)
+{
+       /*
+        * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+        * purposes as that state is handled explicitely in cpu_down.
+        */
+       return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+}
+
+static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+{
+       struct cpuhp_step *sp;
+
+       sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+       return sp + state;
+}
+
+static void cpuhp_store_callbacks(enum cpuhp_state state,
+                                 const char *name,
+                                 int (*startup)(unsigned int cpu),
+                                 int (*teardown)(unsigned int cpu))
+{
+       /* (Un)Install the callbacks for further cpu hotplug operations */
+       struct cpuhp_step *sp;
+
+       mutex_lock(&cpuhp_state_mutex);
+       sp = cpuhp_get_step(state);
+       sp->startup = startup;
+       sp->teardown = teardown;
+       sp->name = name;
+       mutex_unlock(&cpuhp_state_mutex);
+}
+
+static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
+{
+       return cpuhp_get_step(state)->teardown;
+}
+
+/*
+ * Call the startup/teardown function for a step either on the AP or
+ * on the current CPU.
+ */
+static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
+                           int (*cb)(unsigned int), bool bringup)
+{
+       int ret;
+
+       if (!cb)
+               return 0;
+       /*
+        * The non AP bound callbacks can fail on bringup. On teardown
+        * e.g. module removal we crash for now.
+        */
+#ifdef CONFIG_SMP
+       if (cpuhp_is_ap_state(state))
+               ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+       else
+               ret = cpuhp_invoke_callback(cpu, state, cb);
+#else
+       ret = cpuhp_invoke_callback(cpu, state, cb);
+#endif
+       BUG_ON(ret && !bringup);
+       return ret;
+}
+
+/*
+ * Called from __cpuhp_setup_state on a recoverable failure.
+ *
+ * Note: The teardown callbacks for rollback are not allowed to fail!
+ */
+static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
+                                  int (*teardown)(unsigned int cpu))
+{
+       int cpu;
+
+       if (!teardown)
+               return;
+
+       /* Roll back the already executed steps on the other cpus */
+       for_each_present_cpu(cpu) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+               int cpustate = st->state;
+
+               if (cpu >= failedcpu)
+                       break;
+
+               /* Did we invoke the startup call on that cpu ? */
+               if (cpustate >= state)
+                       cpuhp_issue_call(cpu, state, teardown, false);
+       }
+}
+
+/*
+ * Returns a free for dynamic slot assignment of the Online state. The states
+ * are protected by the cpuhp_slot_states mutex and an empty slot is identified
+ * by having no name assigned.
+ */
+static int cpuhp_reserve_state(enum cpuhp_state state)
+{
+       enum cpuhp_state i;
+
+       mutex_lock(&cpuhp_state_mutex);
+       for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
+               if (cpuhp_ap_states[i].name)
+                       continue;
+
+               cpuhp_ap_states[i].name = "Reserved";
+               mutex_unlock(&cpuhp_state_mutex);
+               return i;
+       }
+       mutex_unlock(&cpuhp_state_mutex);
+       WARN(1, "No more dynamic states available for CPU hotplug\n");
+       return -ENOSPC;
+}
+
 /**
- * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
- * @cpu: cpu that just started
+ * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * @state:     The state to setup
+ * @invoke:    If true, the startup function is invoked for cpus where
+ *             cpu state >= @state
+ * @startup:   startup callback function
+ * @teardown:  teardown callback function
  *
- * This function calls the cpu_chain notifiers with CPU_STARTING.
- * It must be called by the arch code on the new cpu, before the new cpu
- * enables interrupts and before the "boot" cpu returns from __cpu_up().
+ * Returns 0 if successful, otherwise a proper error code
  */
-void notify_cpu_starting(unsigned int cpu)
+int __cpuhp_setup_state(enum cpuhp_state state,
+                       const char *name, bool invoke,
+                       int (*startup)(unsigned int cpu),
+                       int (*teardown)(unsigned int cpu))
 {
-       unsigned long val = CPU_STARTING;
+       int cpu, ret = 0;
+       int dyn_state = 0;
 
-#ifdef CONFIG_PM_SLEEP_SMP
-       if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
-               val = CPU_STARTING_FROZEN;
-#endif /* CONFIG_PM_SLEEP_SMP */
-       cpu_notify(val, (void *)(long)cpu);
+       if (cpuhp_cb_check(state) || !name)
+               return -EINVAL;
+
+       get_online_cpus();
+
+       /* currently assignments for the ONLINE state are possible */
+       if (state == CPUHP_AP_ONLINE_DYN) {
+               dyn_state = 1;
+               ret = cpuhp_reserve_state(state);
+               if (ret < 0)
+                       goto out;
+               state = ret;
+       }
+
+       cpuhp_store_callbacks(state, name, startup, teardown);
+
+       if (!invoke || !startup)
+               goto out;
+
+       /*
+        * Try to call the startup callback for each present cpu
+        * depending on the hotplug state of the cpu.
+        */
+       for_each_present_cpu(cpu) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+               int cpustate = st->state;
+
+               if (cpustate < state)
+                       continue;
+
+               ret = cpuhp_issue_call(cpu, state, startup, true);
+               if (ret) {
+                       cpuhp_rollback_install(cpu, state, teardown);
+                       cpuhp_store_callbacks(state, NULL, NULL, NULL);
+                       goto out;
+               }
+       }
+out:
+       put_online_cpus();
+       if (!ret && dyn_state)
+               return state;
+       return ret;
 }
+EXPORT_SYMBOL(__cpuhp_setup_state);
 
-#endif /* CONFIG_SMP */
+/**
+ * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * @state:     The state to remove
+ * @invoke:    If true, the teardown function is invoked for cpus where
+ *             cpu state >= @state
+ *
+ * The teardown callback is currently not allowed to fail. Think
+ * about module removal!
+ */
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+       int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+       int cpu;
+
+       BUG_ON(cpuhp_cb_check(state));
+
+       get_online_cpus();
+
+       if (!invoke || !teardown)
+               goto remove;
+
+       /*
+        * Call the teardown callback for each present cpu depending
+        * on the hotplug state of the cpu. This function is not
+        * allowed to fail currently!
+        */
+       for_each_present_cpu(cpu) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+               int cpustate = st->state;
+
+               if (cpustate >= state)
+                       cpuhp_issue_call(cpu, state, teardown, false);
+       }
+remove:
+       cpuhp_store_callbacks(state, NULL, NULL, NULL);
+       put_online_cpus();
+}
+EXPORT_SYMBOL(__cpuhp_remove_state);
+
+#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
+static ssize_t show_cpuhp_state(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+       return sprintf(buf, "%d\n", st->state);
+}
+static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
+
+static ssize_t write_cpuhp_target(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+       struct cpuhp_step *sp;
+       int target, ret;
+
+       ret = kstrtoint(buf, 10, &target);
+       if (ret)
+               return ret;
+
+#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
+       if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
+               return -EINVAL;
+#else
+       if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
+               return -EINVAL;
+#endif
+
+       ret = lock_device_hotplug_sysfs();
+       if (ret)
+               return ret;
+
+       mutex_lock(&cpuhp_state_mutex);
+       sp = cpuhp_get_step(target);
+       ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
+       mutex_unlock(&cpuhp_state_mutex);
+       if (ret)
+               return ret;
+
+       if (st->state < target)
+               ret = do_cpu_up(dev->id, target);
+       else
+               ret = do_cpu_down(dev->id, target);
+
+       unlock_device_hotplug();
+       return ret ? ret : count;
+}
+
+static ssize_t show_cpuhp_target(struct device *dev,
+                                struct device_attribute *attr, char *buf)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+       return sprintf(buf, "%d\n", st->target);
+}
+static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
+
+static struct attribute *cpuhp_cpu_attrs[] = {
+       &dev_attr_state.attr,
+       &dev_attr_target.attr,
+       NULL
+};
+
+static struct attribute_group cpuhp_cpu_attr_group = {
+       .attrs = cpuhp_cpu_attrs,
+       .name = "hotplug",
+       NULL
+};
+
+static ssize_t show_cpuhp_states(struct device *dev,
+                                struct device_attribute *attr, char *buf)
+{
+       ssize_t cur, res = 0;
+       int i;
+
+       mutex_lock(&cpuhp_state_mutex);
+       for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
+               struct cpuhp_step *sp = cpuhp_get_step(i);
+
+               if (sp->name) {
+                       cur = sprintf(buf, "%3d: %s\n", i, sp->name);
+                       buf += cur;
+                       res += cur;
+               }
+       }
+       mutex_unlock(&cpuhp_state_mutex);
+       return res;
+}
+static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
+
+static struct attribute *cpuhp_cpu_root_attrs[] = {
+       &dev_attr_states.attr,
+       NULL
+};
+
+static struct attribute_group cpuhp_cpu_root_attr_group = {
+       .attrs = cpuhp_cpu_root_attrs,
+       .name = "hotplug",
+       NULL
+};
+
+static int __init cpuhp_sysfs_init(void)
+{
+       int cpu, ret;
+
+       ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+                                &cpuhp_cpu_root_attr_group);
+       if (ret)
+               return ret;
+
+       for_each_possible_cpu(cpu) {
+               struct device *dev = get_cpu_device(cpu);
+
+               if (!dev)
+                       continue;
+               ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+device_initcall(cpuhp_sysfs_init);
+#endif
 
 /*
  * cpu_bit_bitmap[] is a special, "compressed" data structure that
@@ -789,3 +1673,25 @@ void init_cpu_online(const struct cpumask *src)
 {
        cpumask_copy(&__cpu_online_mask, src);
 }
+
+/*
+ * Activate the first processor.
+ */
+void __init boot_cpu_init(void)
+{
+       int cpu = smp_processor_id();
+
+       /* Mark the boot cpu "present", "online" etc for SMP and UP case */
+       set_cpu_online(cpu, true);
+       set_cpu_active(cpu, true);
+       set_cpu_present(cpu, true);
+       set_cpu_possible(cpu, true);
+}
+
+/*
+ * Must be called _AFTER_ setting up the per_cpu areas
+ */
+void __init boot_cpu_state_init(void)
+{
+       per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
+}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e41dd4131f7a..85b41341272e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2607,28 +2607,6 @@ static void rcu_cleanup_dead_rnp(struct rcu_node 
*rnp_leaf)
 }
 
 /*
- * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
- * function.  We now remove it from the rcu_node tree's ->qsmaskinit
- * bit masks.
- */
-static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
-{
-       unsigned long flags;
-       unsigned long mask;
-       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-       struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
-
-       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
-               return;
-
-       /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
-       mask = rdp->grpmask;
-       raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order 
guarantee. */
-       rnp->qsmaskinitnext &= ~mask;
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
  * The CPU has been completely removed, and some other CPU is reporting
  * this fact from process context.  Do the remainder of the cleanup,
  * including orphaning the outgoing CPU's RCU callbacks, and also
@@ -4247,6 +4225,43 @@ static void rcu_prepare_cpu(int cpu)
                rcu_init_percpu_data(cpu, rsp);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
+ * function.  We now remove it from the rcu_node tree's ->qsmaskinit
+ * bit masks.
+ */
+static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
+{
+       unsigned long flags;
+       unsigned long mask;
+       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+       struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
+
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+               return;
+
+       /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
+       mask = rdp->grpmask;
+       raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order 
guarantee. */
+       rnp->qsmaskinitnext &= ~mask;
+       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+void rcu_report_dead(unsigned int cpu)
+{
+       struct rcu_state *rsp;
+
+       /* QS for any half-done expedited RCU-sched GP. */
+       preempt_disable();
+       rcu_report_exp_rdp(&rcu_sched_state,
+                          this_cpu_ptr(rcu_sched_state.rda), true);
+       preempt_enable();
+       for_each_rcu_flavor(rsp)
+               rcu_cleanup_dying_idle_cpu(cpu, rsp);
+}
+#endif
+
 /*
  * Handle CPU online/offline notification events.
  */
@@ -4278,17 +4293,6 @@ int rcu_cpu_notify(struct notifier_block *self,
                for_each_rcu_flavor(rsp)
                        rcu_cleanup_dying_cpu(rsp);
                break;
-       case CPU_DYING_IDLE:
-               /* QS for any half-done expedited RCU-sched GP. */
-               preempt_disable();
-               rcu_report_exp_rdp(&rcu_sched_state,
-                                  this_cpu_ptr(rcu_sched_state.rda), true);
-               preempt_enable();
-
-               for_each_rcu_flavor(rsp) {
-                       rcu_cleanup_dying_idle_cpu(cpu, rsp);
-               }
-               break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
        case CPU_UP_CANCELED:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9503d590e5ef..626646396ca0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5692,16 +5692,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
                set_cpu_rq_start_time();
                return NOTIFY_OK;
 
-       case CPU_ONLINE:
-               /*
-                * At this point a starting CPU has marked itself as online via
-                * set_cpu_online(). But it might not yet have marked itself
-                * as active, which is essential from here on.
-                */
-               set_cpu_active(cpu, true);
-               stop_machine_unpark(cpu);
-               return NOTIFY_OK;
-
        case CPU_DOWN_FAILED:
                set_cpu_active(cpu, true);
                return NOTIFY_OK;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 544a7133cbd1..bd12c6c714ec 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
+#include <linux/cpuhotplug.h>
 #include <linux/tick.h>
 #include <linux/mm.h>
 #include <linux/stackprotector.h>
@@ -193,8 +194,6 @@ exit_idle:
        rcu_idle_exit();
 }
 
-DEFINE_PER_CPU(bool, cpu_dead_idle);
-
 /*
  * Generic idle loop implementation
  *
@@ -221,10 +220,7 @@ static void cpu_idle_loop(void)
                        rmb();
 
                        if (cpu_is_offline(smp_processor_id())) {
-                               rcu_cpu_notify(NULL, CPU_DYING_IDLE,
-                                              (void 
*)(long)smp_processor_id());
-                               smp_mb(); /* all activity before dead. */
-                               this_cpu_write(cpu_dead_idle, true);
+                               cpuhp_report_idle_dead();
                                arch_cpu_idle_dead();
                        }
 
@@ -291,5 +287,6 @@ void cpu_startup_entry(enum cpuhp_state state)
        boot_init_stack_canary();
 #endif
        arch_cpu_idle_prepare();
+       cpuhp_online_idle(state);
        cpu_idle_loop();
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index d903c02223af..822ffb1ada3f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -569,6 +569,7 @@ void __init smp_init(void)
        unsigned int cpu;
 
        idle_threads_init();
+       cpuhp_threads_init();
 
        /* FIXME: This should be done in userspace --RR */
        for_each_present_cpu(cpu) {
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d264f59bff56..13bc43d1fb22 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -226,7 +226,7 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread 
*ht, unsigned int cp
                kthread_unpark(tsk);
 }
 
-void smpboot_unpark_threads(unsigned int cpu)
+int smpboot_unpark_threads(unsigned int cpu)
 {
        struct smp_hotplug_thread *cur;
 
@@ -235,6 +235,7 @@ void smpboot_unpark_threads(unsigned int cpu)
                if (cpumask_test_cpu(cpu, cur->cpumask))
                        smpboot_unpark_thread(cur, cpu);
        mutex_unlock(&smpboot_threads_lock);
+       return 0;
 }
 
 static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int 
cpu)
@@ -245,7 +246,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread 
*ht, unsigned int cpu)
                kthread_park(tsk);
 }
 
-void smpboot_park_threads(unsigned int cpu)
+int smpboot_park_threads(unsigned int cpu)
 {
        struct smp_hotplug_thread *cur;
 
@@ -253,6 +254,7 @@ void smpboot_park_threads(unsigned int cpu)
        list_for_each_entry_reverse(cur, &hotplug_threads, list)
                smpboot_park_thread(cur, cpu);
        mutex_unlock(&smpboot_threads_lock);
+       return 0;
 }
 
 static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 72415a0eb955..485b81cfab34 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -14,7 +14,9 @@ static inline void idle_threads_init(void) { }
 #endif
 
 int smpboot_create_threads(unsigned int cpu);
-void smpboot_park_threads(unsigned int cpu);
-void smpboot_unpark_threads(unsigned int cpu);
+int smpboot_park_threads(unsigned int cpu);
+int smpboot_unpark_threads(unsigned int cpu);
+
+void __init cpuhp_threads_init(void);
 
 #endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8bfd1aca7a3d..f28f7fad452f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1442,6 +1442,19 @@ config DEBUG_BLOCK_EXT_DEVT
 
          Say N if you are unsure.
 
+config CPU_HOTPLUG_STATE_CONTROL
+       bool "Enable CPU hotplug state control"
+       depends on DEBUG_KERNEL
+       depends on HOTPLUG_CPU
+       default n
+       help
+         Allows to write steps between "offline" and "online" to the CPUs
+         sysfs target file so states can be stepped granular. This is a debug
+         option for now as the hotplug machinery cannot be stopped and
+         restarted at arbitrary points yet.
+
+         Say N if your are unsure.
+
 config NOTIFIER_ERROR_INJECTION
        tristate "Notifier error injection"
        depends on DEBUG_KERNEL

Reply via email to