select_idle_cpu() accounts average search cost for the purposes of conducting a limited proportional search if SIS_PROP is enabled. The issue is that select_idle_cpu() does not account for the cost if a candidate is found and select_idle_smt() is ignored.
This patch moves the accounting of avg_cost to cover the cpu/smt search costs. select_idle_core() costs could be accounted for but it has its own throttling mechanism by tracking depending on whether idle cores are expected to exist. This patch is a bisection hazard becuse SIS_PROP and how it balances avg_cost vs avg_idle was probably guided by the fact that avg_cost was not always accounted for. Signed-off-by: Mel Gorman <mgor...@techsingularity.net> --- kernel/sched/fair.c | 82 +++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1d8f5c4b4936..185fc6e28f8e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6006,6 +6006,29 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p return new_cpu; } +static int sis_search_depth(struct sched_domain *sd, struct sched_domain *this_sd) +{ + u64 avg_cost, avg_idle, span_avg; + int nr = INT_MAX; + + if (sched_feat(SIS_PROP)) { + /* + * Due to large variance we need a large fuzz factor; hackbench in + * particularly is sensitive here. + */ + avg_idle = this_rq()->avg_idle / 512; + avg_cost = this_sd->avg_scan_cost + 1; + + span_avg = sd->span_weight * avg_idle; + if (span_avg > 4*avg_cost) + nr = div_u64(span_avg, avg_cost); + else + nr = 4; + } + + return nr; +} + #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); EXPORT_SYMBOL_GPL(sched_smt_present); @@ -6151,35 +6174,11 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd * comparing the average scan cost (tracked in sd->avg_scan_cost) against the * average idle time for this rq (as found in rq->avg_idle). */ -static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) +static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, + int target, int nr) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); - struct sched_domain *this_sd; - u64 avg_cost, avg_idle; - u64 time; - int this = smp_processor_id(); - int cpu, nr = INT_MAX; - - this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); - if (!this_sd) - return -1; - - /* - * Due to large variance we need a large fuzz factor; hackbench in - * particularly is sensitive here. - */ - avg_idle = this_rq()->avg_idle / 512; - avg_cost = this_sd->avg_scan_cost + 1; - - if (sched_feat(SIS_PROP)) { - u64 span_avg = sd->span_weight * avg_idle; - if (span_avg > 4*avg_cost) - nr = div_u64(span_avg, avg_cost); - else - nr = 4; - } - - time = cpu_clock(this); + int cpu; cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); __cpumask_clear_cpu(target, cpus); @@ -6192,9 +6191,6 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t break; } - time = cpu_clock(this) - time; - update_avg(&this_sd->avg_scan_cost, time); - return cpu; } @@ -6245,9 +6241,10 @@ static inline bool asym_fits_capacity(int task_util, int cpu) */ static int select_idle_sibling(struct task_struct *p, int prev, int target) { - struct sched_domain *sd; + struct sched_domain *sd, *this_sd; unsigned long task_util; - int i, recent_used_cpu; + int i, recent_used_cpu, depth; + u64 time; schedstat_inc(this_rq()->sis_search); @@ -6337,21 +6334,34 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (!sd) return target; + this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); + if (!this_sd) + return target; + + depth = sis_search_depth(sd, this_sd); + schedstat_inc(this_rq()->sis_domain_search); i = select_idle_core(p, sd, target); if ((unsigned)i < nr_cpumask_bits) return i; - i = select_idle_cpu(p, sd, target); + time = cpu_clock(smp_processor_id()); + i = select_idle_cpu(p, sd, target, depth); if ((unsigned)i < nr_cpumask_bits) - return i; + goto acct_cost; i = select_idle_smt(p, sd, target); if ((unsigned)i < nr_cpumask_bits) - return i; + goto acct_cost; schedstat_inc(this_rq()->sis_failed); - return target; + i = target; + +acct_cost: + time = cpu_clock(smp_processor_id()) - time; + update_avg(&this_sd->avg_scan_cost, time); + + return i; } /** -- 2.26.2