On 2021/1/15 18:08, Mel Gorman wrote: > From: Peter Zijlstra (Intel) <pet...@infradead.org> > > Both select_idle_core() and select_idle_cpu() do a loop over the same > cpumask. Observe that by clearing the already visited CPUs, we can > fold the iteration and iterate a core at a time. > > All we need to do is remember any non-idle CPU we encountered while > scanning for an idle core. This way we'll only iterate every CPU once. > > Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> > Signed-off-by: Mel Gorman <mgor...@techsingularity.net> > --- > kernel/sched/fair.c | 97 +++++++++++++++++++++++++++------------------ > 1 file changed, 59 insertions(+), 38 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 12e08da90024..6c0f841e9e75 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain > *sd, struct task_struct *p > return new_cpu; > } > > +static inline int __select_idle_cpu(struct task_struct *p, int core, struct > cpumask *cpus)
Sorry if I missed anything, why p and cpus are needed here? > +{ > + if (available_idle_cpu(core) || sched_idle_cpu(core)) > + return core; > + > + return -1; > +} > + > #ifdef CONFIG_SCHED_SMT > DEFINE_STATIC_KEY_FALSE(sched_smt_present); > EXPORT_SYMBOL_GPL(sched_smt_present); > @@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq) > * there are no idle cores left in the system; tracked through > * sd_llc->shared->has_idle_cores and enabled through update_idle_core() > above. > */ > -static int select_idle_core(struct task_struct *p, struct sched_domain *sd, > int target) > +static int select_idle_core(struct task_struct *p, int core, struct cpumask > *cpus, int *idle_cpu) > { > - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); > - int core, cpu; > + bool idle = true; > + int cpu; > > if (!static_branch_likely(&sched_smt_present)) > - return -1; > - > - if (!test_idle_cores(target, false)) > - return -1; > - > - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); > + return __select_idle_cpu(p, core, cpus); > > - for_each_cpu_wrap(core, cpus, target) { > - bool idle = true; > - > - for_each_cpu(cpu, cpu_smt_mask(core)) { > - if (!available_idle_cpu(cpu)) { > - idle = false; > - break; > + for_each_cpu(cpu, cpu_smt_mask(core)) { > + if (!available_idle_cpu(cpu)) { > + idle = false; > + if (*idle_cpu == -1) { > + if (sched_idle_cpu(cpu) && > cpumask_test_cpu(cpu, p->cpus_ptr)) { > + *idle_cpu = cpu; > + break; > + } > + continue; > } > + break; > } > - > - if (idle) > - return core; > - > - cpumask_andnot(cpus, cpus, cpu_smt_mask(core)); > + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr)) > + *idle_cpu = cpu; > } > > - /* > - * Failed to find an idle core; stop looking for one. > - */ > - set_idle_cores(target, 0); > + if (idle) > + return core; > > + cpumask_andnot(cpus, cpus, cpu_smt_mask(core)); > return -1; > } > > @@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, > struct sched_domain *sd, int > > #define sched_smt_weight 1 > > -static inline int select_idle_core(struct task_struct *p, struct > sched_domain *sd, int target) > +static inline void set_idle_cores(int cpu, int val) > { > - return -1; > +} > + > +static inline bool test_idle_cores(int cpu, bool def) > +{ > + return def; > +} > + > +static inline int select_idle_core(struct task_struct *p, int core, struct > cpumask *cpus, int *idle_cpu) > +{ > + return __select_idle_cpu(p, core, cpus); > } > > #endif /* CONFIG_SCHED_SMT */ > @@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct > *p, struct sched_domain *s > static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, > int target) > { > struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); > + int i, cpu, idle_cpu = -1, nr = INT_MAX; > + bool smt = test_idle_cores(target, false); > + int this = smp_processor_id(); > struct sched_domain *this_sd; > u64 time; > - int this = smp_processor_id(); > - int cpu, nr = INT_MAX; > > this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); > if (!this_sd) > @@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, > struct sched_domain *sd, int t > > cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); > > - if (sched_feat(SIS_PROP)) { > + if (sched_feat(SIS_PROP) && !smt) { Is it possible the system does have a idle core, but I still don't want to scan the entire llc domain? > u64 avg_cost, avg_idle, span_avg; > > /* > @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, > struct sched_domain *sd, int t > for_each_cpu_wrap(cpu, cpus, target) { > if (!--nr) > return -1; It looks like nr only makes sense when smt = false now, can it be moved into else branch below? > - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) > - break; > + if (smt) { > + i = select_idle_core(p, cpu, cpus, &idle_cpu); > + if ((unsigned int)i < nr_cpumask_bits) > + return i; What if the last idle core is selected here, should we set_idle_cores false before return? > + > + } else { > + i = __select_idle_cpu(p, cpu, cpus); > + if ((unsigned int)i < nr_cpumask_bits) { > + idle_cpu = i; > + break; > + } > + } > } > > - if (sched_feat(SIS_PROP)) { > + if (smt) > + set_idle_cores(this, false); > + > + if (sched_feat(SIS_PROP) && !smt) { > time = cpu_clock(this) - time; > update_avg(&this_sd->avg_scan_cost, time); > } > > - return cpu; > + return idle_cpu; > } > > /* > @@ -6297,10 +6322,6 @@ static int select_idle_sibling(struct task_struct *p, > int prev, int target) > if (!sd) > return target; > > - i = select_idle_core(p, sd, target); > - if ((unsigned)i < nr_cpumask_bits) > - return i; > - > i = select_idle_cpu(p, sd, target); > if ((unsigned)i < nr_cpumask_bits) > return i; >