Once stop_machine() is gone from the CPU offline path, we won't be able to depend on disabling preemption to prevent CPUs from going offline from under us.
Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline, while invoking from atomic context. Cc: Ingo Molnar <mi...@redhat.com> Cc: Peter Zijlstra <pet...@infradead.org> Signed-off-by: Srivatsa S. Bhat <srivatsa.b...@linux.vnet.ibm.com> --- kernel/sched/fair.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f77f9c5..62d98dd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3369,7 +3369,8 @@ done: * * Returns the target CPU number, or the same CPU if no balancing is needed. * - * preempt must be disabled. + * Must be called within get/put_online_cpus_atomic(), to prevent CPUs + * from going offline from under us. */ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) @@ -5289,6 +5290,8 @@ void idle_balance(int this_cpu, struct rq *this_rq) raw_spin_unlock(&this_rq->lock); update_blocked_averages(this_cpu); + + get_online_cpus_atomic(); rcu_read_lock(); for_each_domain(this_cpu, sd) { unsigned long interval; @@ -5312,6 +5315,7 @@ void idle_balance(int this_cpu, struct rq *this_rq) } } rcu_read_unlock(); + put_online_cpus_atomic(); raw_spin_lock(&this_rq->lock); @@ -5338,6 +5342,7 @@ static int active_load_balance_cpu_stop(void *data) struct rq *target_rq = cpu_rq(target_cpu); struct sched_domain *sd; + get_online_cpus_atomic(); raw_spin_lock_irq(&busiest_rq->lock); /* make sure the requested cpu hasn't gone down in the meantime */ @@ -5389,6 +5394,7 @@ static int active_load_balance_cpu_stop(void *data) out_unlock: busiest_rq->active_balance = 0; raw_spin_unlock_irq(&busiest_rq->lock); + put_online_cpus_atomic(); return 0; } @@ -5549,6 +5555,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) update_blocked_averages(cpu); + get_online_cpus_atomic(); rcu_read_lock(); for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -5597,6 +5604,7 @@ out: break; } rcu_read_unlock(); + put_online_cpus_atomic(); /* * next_balance will be updated only when there is a need. @@ -5728,6 +5736,7 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; + get_online_cpus_atomic(); rebalance_domains(this_cpu, idle); /* @@ -5736,6 +5745,7 @@ static void run_rebalance_domains(struct softirq_action *h) * stopped. */ nohz_idle_balance(this_cpu, idle); + put_online_cpus_atomic(); } static inline int on_null_domain(int cpu) @@ -5753,8 +5763,10 @@ void trigger_load_balance(struct rq *rq, int cpu) likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); #ifdef CONFIG_NO_HZ_COMMON + get_online_cpus_atomic(); if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); + put_online_cpus_atomic(); #endif } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/