On Mon, May 06, 2019 at 06:48:33AM +0200, Luca Abeni wrote: > @@ -1223,8 +1250,17 @@ static void update_curr_dl(struct rq *rq) > dl_se->dl_overrun = 1; > > __dequeue_task_dl(rq, curr, 0); > - if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) > + if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) { > enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); > +#ifdef CONFIG_SMP > + } else if (dl_se->dl_adjust) { > + if (rq->migrating_task == NULL) { > + queue_balance_callback(rq, > &per_cpu(dl_migrate_head, rq->cpu), migrate_dl_task);
I'm not entirely sure about this one. That is, we only do those callbacks from: schedule_tail() __schedule() rt_mutex_setprio() __sched_setscheduler() and the above looks like it can happen outside of those. The pattern in those sites is: rq_lock(); ... do crap that leads to queue_balance_callback() rq_unlock() if (rq->balance_callback) { raw_spin_lock_irqsave(rq->lock, flags); ... do callbacks raw_spin_unlock_irqrestore(rq->lock, flags); } So I suppose can catch abuse of this API by doing something like the below; can you validate? --- diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index aaca0e743776..89e615f1eae6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1134,6 +1134,14 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) rf->cookie = lockdep_pin_lock(&rq->lock); #ifdef CONFIG_SCHED_DEBUG +#ifdef CONFIG_SMP + /* + * There should not be pending callbacks at the start of rq_lock(); + * all sites that handle them flush them at the end. + */ + WARN_ON_ONCE(rq->balance_callback); +#endif + rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); rf->clock_update_flags = 0; #endif