Hi Alex,

On Sat, 30 Mar 2013 22:35:00 +0800, Alex Shi wrote:
> Sleeping task has no utiliation, when they were bursty waked up, the
> zero utilization make scheduler out of balance, like aim7 benchmark.
>
> rq->avg_idle is 'to used to accommodate bursty loads in a dirt simple
> dirt cheap manner' -- Mike Galbraith.
>
> With this cheap and smart bursty indicator, we can find the wake up
> burst, and use nr_running as instant utilization in this scenario.
>
> For other scenarios, we still use the precise CPU utilization to
> judage if a domain is eligible for power scheduling.
>
> Thanks for Mike Galbraith's idea!
>
> Signed-off-by: Alex Shi <alex....@intel.com>
> ---
>  kernel/sched/fair.c | 33 ++++++++++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 83b2c39..ae07190 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3371,12 +3371,19 @@ static unsigned int max_rq_util(int cpu)
>   * Try to collect the task running number and capacity of the group.
>   */
>  static void get_sg_power_stats(struct sched_group *group,
> -     struct sched_domain *sd, struct sg_lb_stats *sgs)
> +     struct sched_domain *sd, struct sg_lb_stats *sgs, int burst)
>  {
>       int i;
>  
> -     for_each_cpu(i, sched_group_cpus(group))
> -             sgs->group_util += max_rq_util(i);
> +     for_each_cpu(i, sched_group_cpus(group)) {
> +             struct rq *rq = cpu_rq(i);
> +
> +             if (burst && rq->nr_running > 1)
> +                     /* use nr_running as instant utilization */
> +                     sgs->group_util += rq->nr_running;

I guess multiplying FULL_UTIL to rq->nr_running here will remove
special-casing the burst in is_sd_full().  Also moving this logic to
max_rq_util() looks better IMHO.


> +             else
> +                     sgs->group_util += max_rq_util(i);
> +     }
>  
>       sgs->group_weight = group->group_weight;
>  }
> @@ -3390,6 +3397,8 @@ static int is_sd_full(struct sched_domain *sd,
>       struct sched_group *group;
>       struct sg_lb_stats sgs;
>       long sd_min_delta = LONG_MAX;
> +     int cpu = task_cpu(p);
> +     int burst = 0;
>       unsigned int putil;
>  
>       if (p->se.load.weight == p->se.avg.load_avg_contrib)
> @@ -3399,15 +3408,21 @@ static int is_sd_full(struct sched_domain *sd,
>               putil = (u64)(p->se.avg.runnable_avg_sum << SCHED_POWER_SHIFT)
>                               / (p->se.avg.runnable_avg_period + 1);
>  
> +     if (cpu_rq(cpu)->avg_idle < sysctl_sched_burst_threshold)
> +             burst = 1;

Sorry, I don't understand this.

Given that sysctl_sched_burst_threshold is twice of
sysctl_sched_migration_cost which is max value of rq->avg_idle, the
avg_idle will be almost always less than the threshold, right?

So how does it find out the burst case?  I thought it's the case of a
cpu is in idle for a while and then wakes number of tasks at once.  If
so, shouldn't it check whether the avg_idle is *longer* than certain
threshold?  What am I missing?

Thanks,
Namhyung


> +
>       /* Try to collect the domain's utilization */
>       group = sd->groups;
>       do {
>               long g_delta;
>  
>               memset(&sgs, 0, sizeof(sgs));
> -             get_sg_power_stats(group, sd, &sgs);
> +             get_sg_power_stats(group, sd, &sgs, burst);
>  
> -             g_delta = sgs.group_weight * FULL_UTIL - sgs.group_util;
> +             if (burst)
> +                     g_delta = sgs.group_weight - sgs.group_util;
> +             else
> +                     g_delta = sgs.group_weight * FULL_UTIL - sgs.group_util;
>  
>               if (g_delta > 0 && g_delta < sd_min_delta) {
>                       sd_min_delta = g_delta;
> @@ -3417,8 +3432,12 @@ static int is_sd_full(struct sched_domain *sd,
>               sds->sd_util += sgs.group_util;
>       } while  (group = group->next, group != sd->groups);
>  
> -     if (sds->sd_util + putil < sd->span_weight * FULL_UTIL)
> -             return 0;
> +     if (burst) {
> +             if (sds->sd_util < sd->span_weight)
> +                     return 0;
> +     } else
> +             if (sds->sd_util + putil < sd->span_weight * FULL_UTIL)
> +                     return 0;
>  
>       /* can not hold one more task in this domain */
>       return 1;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to