Something is wrong. In find_busiest_group(), we are checking if src has higher load, however, in task_numa_find_cpu(), we are checking if dst will have higher load after balancing. It seems it is not sensible to check src. It maybe cause wrong imbalance value, for example, if dst_running = env->dst_stats.nr_running + 1 results in 3 or above, and src_running = env->src_stats.nr_running - 1 results in 1; The current code is thinking imbalance as 0 since src_running is smaller than 2. This is inconsistent with load balancer.
Fixes: fb86f5b211 ("sched/numa: Use similar logic to the load balancer for moving between domains with spare capacity") Cc: Mel Gorman <mgor...@techsingularity.net> Cc: Peter Zijlstra <a.p.zijls...@chello.nl> Cc: Vincent Guittot <vincent.guit...@linaro.org> Cc: Juri Lelli <juri.le...@redhat.com> Cc: Dietmar Eggemann <dietmar.eggem...@arm.com> Cc: Valentin Schneider <valentin.schnei...@arm.com> Cc: Phil Auld <pa...@redhat.com> Cc: Hillf Danton <hdan...@sina.com> Cc: Ingo Molnar <mi...@kernel.org> Signed-off-by: Barry Song <song.bao....@hisilicon.com> --- kernel/sched/fair.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1a68a05..90cfee7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1548,7 +1548,7 @@ struct task_numa_env { static unsigned long cpu_load(struct rq *rq); static unsigned long cpu_util(int cpu); -static inline long adjust_numa_imbalance(int imbalance, int src_nr_running); +static inline long adjust_numa_imbalance(int imbalance, int nr_running); static inline enum numa_type numa_classify(unsigned int imbalance_pct, @@ -1925,7 +1925,7 @@ static void task_numa_find_cpu(struct task_numa_env *env, src_running = env->src_stats.nr_running - 1; dst_running = env->dst_stats.nr_running + 1; imbalance = max(0, dst_running - src_running); - imbalance = adjust_numa_imbalance(imbalance, src_running); + imbalance = adjust_numa_imbalance(imbalance, dst_running); /* Use idle CPU if there is no imbalance */ if (!imbalance) { @@ -8957,7 +8957,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd } } -static inline long adjust_numa_imbalance(int imbalance, int src_nr_running) +static inline long adjust_numa_imbalance(int imbalance, int nr_running) { unsigned int imbalance_min; @@ -8966,7 +8966,7 @@ static inline long adjust_numa_imbalance(int imbalance, int src_nr_running) * tasks that remain local when the source domain is almost idle. */ imbalance_min = 2; - if (src_nr_running <= imbalance_min) + if (nr_running <= imbalance_min) return 0; return imbalance; -- 2.7.4