From: Thomas Gleixner <t...@linutronix.de>

RT kernels need to ensure that all tasks which are not per CPU kthreads
have left the outgoing CPU to guarantee that no tasks are force migrated
within a migrate disabled section.

There is also some desire to (ab)use fine grained CPU hotplug control to
clear a CPU from active state to force migrate tasks which are not per CPU
kthreads away for power control purposes.

Add a mechanism which waits until all tasks which should leave the CPU
after the CPU active flag is cleared have moved to a different online CPU.

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 kernel/sched/core.c  |   38 +++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h |    4 ++++
 2 files changed, 41 insertions(+), 1 deletion(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2588,6 +2588,20 @@ void sched_ttwu_pending(void *arg)
        rq_unlock_irqrestore(rq, &rf);
 }
 
+/*
+ * Invoked from a CPUs hotplug control thread after the CPU has been marked
+ * inactive. All tasks which are not per CPU kernel threads are either
+ * pushed off this CPU now via balance_push() or placed on a different CPU
+ * during wakeup. Wait until the CPU is quiescent.
+ */
+static void balance_hotplug_wait(void)
+{
+       struct rq *rq = this_rq();
+
+       rcuwait_wait_event(&rq->hotplug_wait, rq->nr_running == 1,
+                          TASK_UNINTERRUPTIBLE);
+}
+
 void send_call_function_single_ipi(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -6898,8 +6912,21 @@ static bool balance_push(struct rq *rq)
         * Both the cpu-hotplug and stop task are in this case and are
         * required to complete the hotplug process.
         */
-       if (is_per_cpu_kthread(push_task))
+       if (is_per_cpu_kthread(push_task)) {
+               /*
+                * If this is the idle task on the outgoing CPU try to wake
+                * up the hotplug control thread which might wait for the
+                * last task to vanish. The rcuwait_active() check is
+                * accurate here because the waiter is pinned on this CPU
+                * and can't obviously be running in parallel.
+                */
+               if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) {
+                       raw_spin_unlock(&rq->lock);
+                       rcuwait_wake_up(&rq->hotplug_wait);
+                       raw_spin_lock(&rq->lock);
+               }
                return false;
+       }
 
        get_task_struct(push_task);
        /*
@@ -6939,6 +6966,8 @@ static inline bool balance_push(struct r
        return false;
 }
 
+static inline void balance_hotplug_wait(void) { }
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 void set_rq_online(struct rq *rq)
@@ -7093,6 +7122,10 @@ int sched_cpu_deactivate(unsigned int cp
                return ret;
        }
        sched_domains_numa_masks_clear(cpu);
+
+       /* Wait for all non per CPU kernel threads to vanish. */
+       balance_hotplug_wait();
+
        return 0;
 }
 
@@ -7333,6 +7366,9 @@ void __init sched_init(void)
 
                rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
 #endif
+#ifdef CONFIG_HOTPLUG_CPU
+               rcuwait_init(&rq->hotplug_wait);
+#endif
 #endif /* CONFIG_SMP */
                hrtick_rq_init(rq);
                atomic_set(&rq->nr_iowait, 0);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1004,6 +1004,10 @@ struct rq {
 
        /* This is used to determine avg_idle's max value */
        u64                     max_idle_balance_cost;
+
+#ifdef CONFIG_HOTPLUG_CPU
+       struct rcuwait          hotplug_wait;
+#endif
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING


Reply via email to