From: luca abeni <[email protected]>

Currently, the scheduler tries to find a proper placement for
SCHED_DEADLINE tasks when they are pushed out of a core or when
they wake up. Hence, if there is a single SCHED_DEADLINE task
that never blocks and wakes up, such a task is never migrated to
an appropriate CPU core, but continues to execute on its original
core.

This commit addresses the issue by trying to migrate a SCHED_DEADLINE
task (searching for an appropriate CPU core) the first time it is
throttled.

Signed-off-by: luca abeni <[email protected]>
---
 include/linux/sched.h   |  1 +
 kernel/sched/deadline.c | 53 ++++++++++++++++++++++++++++++++++++-----
 kernel/sched/sched.h    |  2 ++
 3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 863f70843875..5e322c8a94e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -560,6 +560,7 @@ struct sched_dl_entity {
        unsigned int                    dl_yielded        : 1;
        unsigned int                    dl_non_contending : 1;
        unsigned int                    dl_overrun        : 1;
+       unsigned int                    dl_adjust         : 1;
 
        /*
         * Bandwidth enforcement timer. Each -deadline task has its
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3436f3d8fa8f..db471889196b 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -515,6 +515,7 @@ static inline bool need_pull_dl_task(struct rq *rq, struct 
task_struct *prev)
        return dl_task(prev);
 }
 
+static DEFINE_PER_CPU(struct callback_head, dl_migrate_head);
 static DEFINE_PER_CPU(struct callback_head, dl_push_head);
 static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
 
@@ -1149,6 +1150,32 @@ static u64 grub_reclaim(u64 delta, struct rq *rq, struct 
sched_dl_entity *dl_se)
        return (delta * u_act) >> BW_SHIFT;
 }
 
+#ifdef CONFIG_SMP
+static int find_later_rq(struct task_struct *task);
+
+static void migrate_dl_task(struct rq *rq)
+{
+       struct task_struct *t = rq->migrating_task;
+       struct sched_dl_entity *dl_se = &t->dl;
+       int cpu = find_later_rq(t);
+
+       if ((cpu != -1) && (cpu != rq->cpu)) {
+               struct rq *later_rq;
+
+               later_rq = cpu_rq(cpu);
+
+               double_lock_balance(rq, later_rq);
+               sub_running_bw(&t->dl, &rq->dl);
+               sub_rq_bw(&t->dl, &rq->dl);
+               set_task_cpu(t, later_rq->cpu);
+               add_rq_bw(&t->dl, &later_rq->dl);
+               add_running_bw(&t->dl, &later_rq->dl);
+               double_unlock_balance(rq, later_rq);
+       }
+       rq->migrating_task = NULL;
+       dl_se->dl_adjust = 0;
+}
+#endif
 /*
  * Update the current task's runtime statistics (provided it is still
  * a -deadline task and has not been removed from the dl_rq).
@@ -1223,8 +1250,17 @@ static void update_curr_dl(struct rq *rq)
                        dl_se->dl_overrun = 1;
 
                __dequeue_task_dl(rq, curr, 0);
-               if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
+               if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) {
                        enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
+#ifdef CONFIG_SMP
+               } else if (dl_se->dl_adjust) {
+                       if (rq->migrating_task == NULL) {
+                               queue_balance_callback(rq, 
&per_cpu(dl_migrate_head, rq->cpu), migrate_dl_task);
+                               rq->migrating_task = current;
+                       } else
+                               printk_deferred("Throttled task before migratin 
g the previous one???\n");
+#endif
+               }
 
                if (!is_leftmost(curr, &rq->dl))
                        resched_curr(rq);
@@ -1573,13 +1609,12 @@ static void yield_task_dl(struct rq *rq)
 
 #ifdef CONFIG_SMP
 
-static int find_later_rq(struct task_struct *task);
-
 static int
 select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
        struct task_struct *curr;
        struct rq *rq;
+       bool het;
 
        if (sd_flag != SD_BALANCE_WAKE)
                goto out;
@@ -1591,6 +1626,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int 
sd_flag, int flags)
 
        rcu_read_lock();
        curr = READ_ONCE(rq->curr); /* unlocked access */
+       het = static_branch_unlikely(&sched_asym_cpucapacity);
 
        /*
         * If we are dealing with a -deadline task, we must
@@ -1604,15 +1640,17 @@ select_task_rq_dl(struct task_struct *p, int cpu, int 
sd_flag, int flags)
        if ((unlikely(dl_task(curr)) &&
            (curr->nr_cpus_allowed < 2 ||
             !dl_entity_preempt(&p->dl, &curr->dl)) &&
-           (p->nr_cpus_allowed > 1)) ||
-           static_branch_unlikely(&sched_asym_cpucapacity)) {
+           (p->nr_cpus_allowed > 1)) || het) {
                int target = find_later_rq(p);
 
                if (target != -1 &&
                                (dl_time_before(p->dl.deadline,
                                        cpu_rq(target)->dl.earliest_dl.curr) ||
-                               (cpu_rq(target)->dl.dl_nr_running == 0)))
+                               (cpu_rq(target)->dl.dl_nr_running == 0))) {
+                       if (het && (target != cpu))
+                               p->dl.dl_adjust = 1;
                        cpu = target;
+               }
        }
        rcu_read_unlock();
 
@@ -2369,6 +2407,9 @@ static void switched_to_dl(struct rq *rq, struct 
task_struct *p)
                else
                        resched_curr(rq);
        }
+
+       if (static_branch_unlikely(&sched_asym_cpucapacity))
+               p->dl.dl_adjust = 1;
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e5f9fd3aee80..1a8f75338ac2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -963,6 +963,8 @@ struct rq {
 
        /* This is used to determine avg_idle's max value */
        u64                     max_idle_balance_cost;
+
+       struct task_struct      *migrating_task;
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-- 
2.20.1

Reply via email to