max_idle_balance_cost whenever newidle balance is attempted

Preeti Murthy Sun, 27 Apr 2014 01:33:25 -0700

Hi Jason, Peter,

The below patch looks good to me except for one point.


In idle_balance() the below code snippet does not look right:

- if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
- /*
- * We are going idle. next_balance may be set based on
- * a busy processor. So reset next_balance.
- */
+out:
+ /* Move the next balance forward */
+ if (time_after(this_rq->next_balance, next_balance))
  this_rq->next_balance = next_balance;
- }

By not checking this_rq->next_balance against jiffies,
we might end up not updating this parameter when it
has expired.

So shouldn't it be:

if (time_after(jiffies, this_rq->next_balance) ||
           time_after(this_rq->next_balance, next_balance))
    this_rq->next_balance = next_balance;


Besides this:
Reviewed-by: Preeti U Murthy <[email protected]>

Regards
Preeti U Murthy


On Sat, Apr 26, 2014 at 1:24 AM, Jason Low <[email protected]> wrote:
> Signed-off-by: Jason Low <[email protected]>
> ---
>  kernel/sched/fair.c |   81 
> ++++++++++++++++++++++++++++++++-------------------
>  1 files changed, 51 insertions(+), 30 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 43232b8..09c546c 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6645,27 +6645,59 @@ out:
>         return ld_moved;
>  }
>
> +static inline unsigned long get_sd_balance_interval(struct sched_domain *sd, 
> int busy)
> +{
> +       unsigned long interval = sd->balance_interval;
> +
> +       if (busy)
> +               interval *= sd->busy_factor;
> +
> +       /* scale ms to jiffies */
> +       interval = msecs_to_jiffies(interval);
> +       interval = clamp(interval, 1UL, max_load_balance_interval);
> +
> +       return interval;
> +}
> +
> +static inline void
> +update_next_balance(struct sched_domain *sd, int busy, unsigned long 
> *next_balance)
> +{
> +       unsigned long interval, next;
> +
> +       interval = get_sd_balance_interval(sd, busy);
> +       next = sd->last_balance + interval;
> +
> +       if (time_after(*next_balance, next))
> +               *next_balance = next;
> +}
> +
>  /*
>   * idle_balance is called by schedule() if this_cpu is about to become
>   * idle. Attempts to pull tasks from other CPUs.
>   */
>  static int idle_balance(struct rq *this_rq)
>  {
> +       unsigned long next_balance = jiffies + HZ;
> +       int this_cpu = this_rq->cpu;
>         struct sched_domain *sd;
>         int pulled_task = 0;
> -       unsigned long next_balance = jiffies + HZ;
>         u64 curr_cost = 0;
> -       int this_cpu = this_rq->cpu;
>
>         idle_enter_fair(this_rq);
> +
>         /*
>          * We must set idle_stamp _before_ calling idle_balance(), such that 
> we
>          * measure the duration of idle_balance() as idle time.
>          */
>         this_rq->idle_stamp = rq_clock(this_rq);
>
> -       if (this_rq->avg_idle < sysctl_sched_migration_cost)
> +       if (this_rq->avg_idle < sysctl_sched_migration_cost) {
> +               rcu_read_lock();
> +               sd = rcu_dereference_check_sched_domain(this_rq->sd);
> +               update_next_balance(sd, 0, &next_balance);
> +               rcu_read_unlock();
>                 goto out;
> +       }
>
>         /*
>          * Drop the rq->lock, but keep IRQ/preempt disabled.
> @@ -6675,15 +6707,16 @@ static int idle_balance(struct rq *this_rq)
>         update_blocked_averages(this_cpu);
>         rcu_read_lock();
>         for_each_domain(this_cpu, sd) {
> -               unsigned long interval;
>                 int continue_balancing = 1;
>                 u64 t0, domain_cost;
>
>                 if (!(sd->flags & SD_LOAD_BALANCE))
>                         continue;
>
> -               if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
> +               if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
> +                       update_next_balance(sd, 0, &next_balance);
>                         break;
> +               }
>
>                 if (sd->flags & SD_BALANCE_NEWIDLE) {
>                         t0 = sched_clock_cpu(this_cpu);
> @@ -6700,9 +6733,7 @@ static int idle_balance(struct rq *this_rq)
>                         curr_cost += domain_cost;
>                 }
>
> -               interval = msecs_to_jiffies(sd->balance_interval);
> -               if (time_after(next_balance, sd->last_balance + interval))
> -                       next_balance = sd->last_balance + interval;
> +               update_next_balance(sd, 0, &next_balance);
>                 if (pulled_task)
>                         break;
>         }
> @@ -6710,27 +6741,22 @@ static int idle_balance(struct rq *this_rq)
>
>         raw_spin_lock(&this_rq->lock);
>
> +       if (curr_cost > this_rq->max_idle_balance_cost)
> +               this_rq->max_idle_balance_cost = curr_cost;
> +
>         /*
> -        * While browsing the domains, we released the rq lock.
> -        * A task could have be enqueued in the meantime
> +        * While browsing the domains, we released the rq lock, a task could
> +        * have be enqueued in the meantime. Since we're not going idle,
> +        * pretend we pulled a task.
>          */
> -       if (this_rq->cfs.h_nr_running && !pulled_task) {
> +       if (this_rq->cfs.h_nr_running && !pulled_task)
>                 pulled_task = 1;
> -               goto out;
> -       }
>
> -       if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
> -               /*
> -                * We are going idle. next_balance may be set based on
> -                * a busy processor. So reset next_balance.
> -                */
> +out:
> +       /* Move the next balance forward */
> +       if (time_after(this_rq->next_balance, next_balance))
>                 this_rq->next_balance = next_balance;
> -       }
> -
> -       if (curr_cost > this_rq->max_idle_balance_cost)
> -               this_rq->max_idle_balance_cost = curr_cost;
>
> -out:
>         /* Is there a task of a high priority class? */
>         if (this_rq->nr_running != this_rq->cfs.h_nr_running)
>                 pulled_task = -1;
> @@ -7013,13 +7039,7 @@ static void rebalance_domains(struct rq *rq, enum 
> cpu_idle_type idle)
>                         break;
>                 }
>
> -               interval = sd->balance_interval;
> -               if (idle != CPU_IDLE)
> -                       interval *= sd->busy_factor;
> -
> -               /* scale ms to jiffies */
> -               interval = msecs_to_jiffies(interval);
> -               interval = clamp(interval, 1UL, max_load_balance_interval);
> +               interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
>
>                 need_serialize = sd->flags & SD_SERIALIZE;
>
> @@ -7038,6 +7058,7 @@ static void rebalance_domains(struct rq *rq, enum 
> cpu_idle_type idle)
>                                 idle = idle_cpu(cpu) ? CPU_IDLE : 
> CPU_NOT_IDLE;
>                         }
>                         sd->last_balance = jiffies;
> +                       interval = get_sd_balance_interval(sd, idle != 
> CPU_IDLE);
>                 }
>                 if (need_serialize)
>                         spin_unlock(&balancing);
> --
> 1.7.1
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/3] sched, balancing: Update rq->max_idle_balance_cost whenever newidle balance is attempted

Reply via email to