On Mon, May 06, 2019 at 06:48:33AM +0200, Luca Abeni wrote:
> @@ -1223,8 +1250,17 @@ static void update_curr_dl(struct rq *rq)
>                       dl_se->dl_overrun = 1;
>  
>               __dequeue_task_dl(rq, curr, 0);
> -             if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
> +             if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) {
>                       enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
> +#ifdef CONFIG_SMP
> +             } else if (dl_se->dl_adjust) {
> +                     if (rq->migrating_task == NULL) {
> +                             queue_balance_callback(rq, 
> &per_cpu(dl_migrate_head, rq->cpu), migrate_dl_task);

I'm not entirely sure about this one.

That is, we only do those callbacks from:

  schedule_tail()
  __schedule()
  rt_mutex_setprio()
  __sched_setscheduler()

and the above looks like it can happen outside of those.

The pattern in those sites is:

        rq_lock();
        ... do crap that leads to queue_balance_callback()
        rq_unlock()
        if (rq->balance_callback) {
                raw_spin_lock_irqsave(rq->lock, flags);
                ... do callbacks
                raw_spin_unlock_irqrestore(rq->lock, flags);
        }

So I suppose can catch abuse of this API by doing something like the
below; can you validate?

---

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index aaca0e743776..89e615f1eae6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1134,6 +1134,14 @@ static inline void rq_pin_lock(struct rq *rq, struct 
rq_flags *rf)
        rf->cookie = lockdep_pin_lock(&rq->lock);
 
 #ifdef CONFIG_SCHED_DEBUG
+#ifdef CONFIG_SMP
+       /*
+        * There should not be pending callbacks at the start of rq_lock();
+        * all sites that handle them flush them at the end.
+        */
+       WARN_ON_ONCE(rq->balance_callback);
+#endif
+
        rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
        rf->clock_update_flags = 0;
 #endif

Reply via email to