From: Vincent Donnefort <vincent.donnef...@arm.com>

The atomic states (between CPUHP_AP_IDLE_DEAD and CPUHP_AP_ONLINE) are
triggered by the CPUHP_BRINGUP_CPU step. If the latter fails, no atomic
state can be rolled back.

DEAD callbacks too can't fail and disallow recovery. As a consequence,
during hotunplug, the fail injection interface should prohibit all states
from CPUHP_BRINGUP_CPU to CPUHP_ONLINE.

Signed-off-by: Vincent Donnefort <vincent.donnef...@arm.com>

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 093f96fb0824..d44877095b8c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1038,9 +1038,13 @@ static int __ref _cpu_down(unsigned int cpu, int 
tasks_frozen,
         * to do the further cleanups.
         */
        ret = cpuhp_down_callbacks(cpu, st, target);
-       if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
-               cpuhp_reset_state(st, prev_state);
-               __cpuhp_kick_ap(st);
+       if (ret && st->state < prev_state) {
+               if (st->state == CPUHP_TEARDOWN_CPU) {
+                       cpuhp_reset_state(st, prev_state);
+                       __cpuhp_kick_ap(st);
+               } else {
+                       WARN(1, "DEAD callback error for CPU%d", cpu);
+               }
        }
 
 out:
@@ -2214,6 +2218,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
        if (cpuhp_is_atomic_state(fail))
                return -EINVAL;
 
+       /*
+        * DEAD callbacks cannot fail...
+        * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
+        * triggering STARTING callbacks, a failure in this state would
+        * hinder rollback.
+        */
+       if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
+               return -EINVAL;
+
        /*
         * Cannot fail anything that doesn't have callbacks.
         */
-- 
2.25.1

Reply via email to