In commit ac66f5477239 ("sched/numa: Introduce migrate_swap()")
select_task_rq() got a 'cpu' argument to enable overriding of prev_cpu
in special cases (NUMA task swapping). However, the
select_task_rq_fair() helper functions: wake_affine() and
select_idle_sibling(), still use task_cpu(p) directly to work out
prev_cpu which leads to inconsistencies.

This patch passes prev_cpu (potentially overridden by NUMA code) into
the helper functions to ensure prev_cpu is indeed the same cpu
everywhere in the wakeup path.

cc: Ingo Molnar <mi...@redhat.com>
cc: Peter Zijlstra <pet...@infradead.org>

Signed-off-by: Morten Rasmussen <morten.rasmus...@arm.com>
---
 kernel/sched/fair.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 218f8e8..c49e25a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -656,7 +656,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct 
sched_entity *se)
 }
 
 #ifdef CONFIG_SMP
-static int select_idle_sibling(struct task_struct *p, int cpu);
+static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
 
 /*
@@ -1502,7 +1502,8 @@ static void task_numa_compare(struct task_numa_env *env,
         * Call select_idle_sibling to maybe find a better one.
         */
        if (!cur)
-               env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
+               env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
+                                                  env->dst_cpu);
 
 assign:
        assigned = true;
@@ -5013,18 +5014,18 @@ static int wake_wide(struct task_struct *p)
        return 1;
 }
 
-static int wake_affine(struct sched_domain *sd, struct task_struct *p, int 
sync)
+static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+                      int prev_cpu, int sync)
 {
        s64 this_load, load;
        s64 this_eff_load, prev_eff_load;
-       int idx, this_cpu, prev_cpu;
+       int idx, this_cpu;
        struct task_group *tg;
        unsigned long weight;
        int balanced;
 
        idx       = sd->wake_idx;
        this_cpu  = smp_processor_id();
-       prev_cpu  = task_cpu(p);
        load      = source_load(prev_cpu, idx);
        this_load = target_load(this_cpu, idx);
 
@@ -5189,11 +5190,10 @@ find_idlest_cpu(struct sched_group *group, struct 
task_struct *p, int this_cpu)
 /*
  * Try and locate an idle CPU in the sched_domain.
  */
-static int select_idle_sibling(struct task_struct *p, int target)
+static int select_idle_sibling(struct task_struct *p, int prev, int target)
 {
        struct sched_domain *sd;
        struct sched_group *sg;
-       int i = task_cpu(p);
 
        if (idle_cpu(target))
                return target;
@@ -5201,8 +5201,8 @@ static int select_idle_sibling(struct task_struct *p, int 
target)
        /*
         * If the prevous cpu is cache affine and idle, don't be stupid.
         */
-       if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
-               return i;
+       if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+               return prev;
 
        /*
         * Otherwise, iterate the domains and find an eligible idle cpu.
@@ -5223,6 +5223,8 @@ static int select_idle_sibling(struct task_struct *p, int 
target)
        for_each_lower_domain(sd) {
                sg = sd->groups;
                do {
+                       int i;
+
                        if (!cpumask_intersects(sched_group_cpus(sg),
                                                tsk_cpus_allowed(p)))
                                goto next;
@@ -5331,13 +5333,13 @@ select_task_rq_fair(struct task_struct *p, int 
prev_cpu, int sd_flag, int wake_f
 
        if (affine_sd) {
                sd = NULL; /* Prefer wake_affine over balance flags */
-               if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
+               if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, 
sync))
                        new_cpu = cpu;
        }
 
        if (!sd) {
                if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
-                       new_cpu = select_idle_sibling(p, new_cpu);
+                       new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
        } else while (sd) {
                struct sched_group *group;
-- 
1.9.1

Reply via email to