Something is wrong. In find_busiest_group(), we are checking if src has
higher load, however, in task_numa_find_cpu(), we are checking if dst
will have higher load after balancing. It seems it is not sensible to
check src.
It maybe cause wrong imbalance value, for example, if
dst_running = env->dst_stats.nr_running + 1 results in 3 or above, and
src_running = env->src_stats.nr_running - 1 results in 1;
The current code is thinking imbalance as 0 since src_running is smaller
than 2.
This is inconsistent with load balancer.

Fixes: fb86f5b211 ("sched/numa: Use similar logic to the load balancer for 
moving between domains with spare capacity")
Cc: Mel Gorman <mgor...@techsingularity.net>
Cc: Peter Zijlstra <a.p.zijls...@chello.nl>
Cc: Vincent Guittot <vincent.guit...@linaro.org>
Cc: Juri Lelli <juri.le...@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggem...@arm.com>
Cc: Valentin Schneider <valentin.schnei...@arm.com>
Cc: Phil Auld <pa...@redhat.com>
Cc: Hillf Danton <hdan...@sina.com>
Cc: Ingo Molnar <mi...@kernel.org>
Signed-off-by: Barry Song <song.bao....@hisilicon.com>
---
 kernel/sched/fair.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1a68a05..90cfee7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1548,7 +1548,7 @@ struct task_numa_env {
 
 static unsigned long cpu_load(struct rq *rq);
 static unsigned long cpu_util(int cpu);
-static inline long adjust_numa_imbalance(int imbalance, int src_nr_running);
+static inline long adjust_numa_imbalance(int imbalance, int nr_running);
 
 static inline enum
 numa_type numa_classify(unsigned int imbalance_pct,
@@ -1925,7 +1925,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
                src_running = env->src_stats.nr_running - 1;
                dst_running = env->dst_stats.nr_running + 1;
                imbalance = max(0, dst_running - src_running);
-               imbalance = adjust_numa_imbalance(imbalance, src_running);
+               imbalance = adjust_numa_imbalance(imbalance, dst_running);
 
                /* Use idle CPU if there is no imbalance */
                if (!imbalance) {
@@ -8957,7 +8957,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, 
struct sd_lb_stats *sd
        }
 }
 
-static inline long adjust_numa_imbalance(int imbalance, int src_nr_running)
+static inline long adjust_numa_imbalance(int imbalance, int nr_running)
 {
        unsigned int imbalance_min;
 
@@ -8966,7 +8966,7 @@ static inline long adjust_numa_imbalance(int imbalance, 
int src_nr_running)
         * tasks that remain local when the source domain is almost idle.
         */
        imbalance_min = 2;
-       if (src_nr_running <= imbalance_min)
+       if (nr_running <= imbalance_min)
                return 0;
 
        return imbalance;
-- 
2.7.4


Reply via email to