From: Valentin Schneider <valentin.schnei...@arm.com>

Since commit

  1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug")

tasks are expected to move themselves out of a out-going CPU. For most
tasks this will be done automagically via BALANCE_PUSH, but percpu kthreads
will have to cooperate and move themselves away one way or another.

Currently, some percpu kthreads (workqueues being a notable exemple) do not
cooperate nicely and can end up on an out-going CPU at the time
sched_cpu_dying() is invoked.

Print the dying rq's tasks to shed some light on the stragglers.

Signed-off-by: Valentin Schneider <valentin.schnei...@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Link: 
https://lkml.kernel.org/r/20210113183141.11974-1-valentin.schnei...@arm.com
---
 kernel/sched/core.c |   24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7580,6 +7580,25 @@ static void calc_load_migrate(struct rq
                atomic_long_add(delta, &calc_load_tasks);
 }
 
+static void dump_rq_tasks(struct rq *rq, const char *loglvl)
+{
+       struct task_struct *g, *p;
+       int cpu = cpu_of(rq);
+
+       lockdep_assert_held(&rq->lock);
+
+       printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, 
rq->nr_running);
+       for_each_process_thread(g, p) {
+               if (task_cpu(p) != cpu)
+                       continue;
+
+               if (!task_on_rq_queued(p))
+                       continue;
+
+               printk("%s\tpid: %d, name: %s\n", loglvl, p->pid, p->comm);
+       }
+}
+
 int sched_cpu_dying(unsigned int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -7589,7 +7608,10 @@ int sched_cpu_dying(unsigned int cpu)
        sched_tick_stop(cpu);
 
        rq_lock_irqsave(rq, &rf);
-       BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq));
+       if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) {
+               WARN(true, "Dying CPU not properly vacated!");
+               dump_rq_tasks(rq, KERN_WARNING);
+       }
        rq_unlock_irqrestore(rq, &rf);
 
        calc_load_migrate(rq);


Reply via email to