Hi Jason, Peter, The below patch looks good to me except for one point.
In idle_balance() the below code snippet does not look right: - if (pulled_task || time_after(jiffies, this_rq->next_balance)) { - /* - * We are going idle. next_balance may be set based on - * a busy processor. So reset next_balance. - */ +out: + /* Move the next balance forward */ + if (time_after(this_rq->next_balance, next_balance)) this_rq->next_balance = next_balance; - } By not checking this_rq->next_balance against jiffies, we might end up not updating this parameter when it has expired. So shouldn't it be: if (time_after(jiffies, this_rq->next_balance) || time_after(this_rq->next_balance, next_balance)) this_rq->next_balance = next_balance; Besides this: Reviewed-by: Preeti U Murthy <pre...@linux.vnet.ibm.com> Regards Preeti U Murthy On Sat, Apr 26, 2014 at 1:24 AM, Jason Low <jason.l...@hp.com> wrote: > Signed-off-by: Jason Low <jason.l...@hp.com> > --- > kernel/sched/fair.c | 81 > ++++++++++++++++++++++++++++++++------------------- > 1 files changed, 51 insertions(+), 30 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 43232b8..09c546c 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -6645,27 +6645,59 @@ out: > return ld_moved; > } > > +static inline unsigned long get_sd_balance_interval(struct sched_domain *sd, > int busy) > +{ > + unsigned long interval = sd->balance_interval; > + > + if (busy) > + interval *= sd->busy_factor; > + > + /* scale ms to jiffies */ > + interval = msecs_to_jiffies(interval); > + interval = clamp(interval, 1UL, max_load_balance_interval); > + > + return interval; > +} > + > +static inline void > +update_next_balance(struct sched_domain *sd, int busy, unsigned long > *next_balance) > +{ > + unsigned long interval, next; > + > + interval = get_sd_balance_interval(sd, busy); > + next = sd->last_balance + interval; > + > + if (time_after(*next_balance, next)) > + *next_balance = next; > +} > + > /* > * idle_balance is called by schedule() if this_cpu is about to become > * idle. Attempts to pull tasks from other CPUs. > */ > static int idle_balance(struct rq *this_rq) > { > + unsigned long next_balance = jiffies + HZ; > + int this_cpu = this_rq->cpu; > struct sched_domain *sd; > int pulled_task = 0; > - unsigned long next_balance = jiffies + HZ; > u64 curr_cost = 0; > - int this_cpu = this_rq->cpu; > > idle_enter_fair(this_rq); > + > /* > * We must set idle_stamp _before_ calling idle_balance(), such that > we > * measure the duration of idle_balance() as idle time. > */ > this_rq->idle_stamp = rq_clock(this_rq); > > - if (this_rq->avg_idle < sysctl_sched_migration_cost) > + if (this_rq->avg_idle < sysctl_sched_migration_cost) { > + rcu_read_lock(); > + sd = rcu_dereference_check_sched_domain(this_rq->sd); > + update_next_balance(sd, 0, &next_balance); > + rcu_read_unlock(); > goto out; > + } > > /* > * Drop the rq->lock, but keep IRQ/preempt disabled. > @@ -6675,15 +6707,16 @@ static int idle_balance(struct rq *this_rq) > update_blocked_averages(this_cpu); > rcu_read_lock(); > for_each_domain(this_cpu, sd) { > - unsigned long interval; > int continue_balancing = 1; > u64 t0, domain_cost; > > if (!(sd->flags & SD_LOAD_BALANCE)) > continue; > > - if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) > + if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { > + update_next_balance(sd, 0, &next_balance); > break; > + } > > if (sd->flags & SD_BALANCE_NEWIDLE) { > t0 = sched_clock_cpu(this_cpu); > @@ -6700,9 +6733,7 @@ static int idle_balance(struct rq *this_rq) > curr_cost += domain_cost; > } > > - interval = msecs_to_jiffies(sd->balance_interval); > - if (time_after(next_balance, sd->last_balance + interval)) > - next_balance = sd->last_balance + interval; > + update_next_balance(sd, 0, &next_balance); > if (pulled_task) > break; > } > @@ -6710,27 +6741,22 @@ static int idle_balance(struct rq *this_rq) > > raw_spin_lock(&this_rq->lock); > > + if (curr_cost > this_rq->max_idle_balance_cost) > + this_rq->max_idle_balance_cost = curr_cost; > + > /* > - * While browsing the domains, we released the rq lock. > - * A task could have be enqueued in the meantime > + * While browsing the domains, we released the rq lock, a task could > + * have be enqueued in the meantime. Since we're not going idle, > + * pretend we pulled a task. > */ > - if (this_rq->cfs.h_nr_running && !pulled_task) { > + if (this_rq->cfs.h_nr_running && !pulled_task) > pulled_task = 1; > - goto out; > - } > > - if (pulled_task || time_after(jiffies, this_rq->next_balance)) { > - /* > - * We are going idle. next_balance may be set based on > - * a busy processor. So reset next_balance. > - */ > +out: > + /* Move the next balance forward */ > + if (time_after(this_rq->next_balance, next_balance)) > this_rq->next_balance = next_balance; > - } > - > - if (curr_cost > this_rq->max_idle_balance_cost) > - this_rq->max_idle_balance_cost = curr_cost; > > -out: > /* Is there a task of a high priority class? */ > if (this_rq->nr_running != this_rq->cfs.h_nr_running) > pulled_task = -1; > @@ -7013,13 +7039,7 @@ static void rebalance_domains(struct rq *rq, enum > cpu_idle_type idle) > break; > } > > - interval = sd->balance_interval; > - if (idle != CPU_IDLE) > - interval *= sd->busy_factor; > - > - /* scale ms to jiffies */ > - interval = msecs_to_jiffies(interval); > - interval = clamp(interval, 1UL, max_load_balance_interval); > + interval = get_sd_balance_interval(sd, idle != CPU_IDLE); > > need_serialize = sd->flags & SD_SERIALIZE; > > @@ -7038,6 +7058,7 @@ static void rebalance_domains(struct rq *rq, enum > cpu_idle_type idle) > idle = idle_cpu(cpu) ? CPU_IDLE : > CPU_NOT_IDLE; > } > sd->last_balance = jiffies; > + interval = get_sd_balance_interval(sd, idle != > CPU_IDLE); > } > if (need_serialize) > spin_unlock(&balancing); > -- > 1.7.1 > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/