On Sat, Jul 13, 2013 at 12:47:15PM +0400, Vladimir Davydov wrote:

> ---
>  kernel/sched/fair.c  |   56 
> ++++++++++++++++++++++----------------------------
>  kernel/sched/sched.h |    7 +++----
>  2 files changed, 28 insertions(+), 35 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index f77f9c5..de90690 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4171,47 +4171,46 @@ static void update_blocked_averages(int cpu)
>  }
>  
>  /*
> + * Compute the hierarchical load factor for cfs_rq and all its ascendants.
>   * This needs to be done in a top-down fashion because the load of a child
>   * group is a fraction of its parents load.
>   */
> +static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
>  {
> +     struct rq *rq = rq_of(cfs_rq);
> +     struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
>       unsigned long load;
>  
> +     cfs_rq->h_load_next = NULL;
> +     for_each_sched_entity(se) {
> +             cfs_rq = cfs_rq_of(se);
> +             cfs_rq->h_load_next = se;
> +             if (cfs_rq->last_h_load_update == rq->clock)
> +                     break;
>       }
>  
> +     if (!se) {
> +             cfs_rq->h_load = rq->avg.load_avg_contrib;
> +             cfs_rq->last_h_load_update = rq->clock;
> +     }
>  
> +     while ((se = cfs_rq->h_load_next) != NULL) {
> +             load = cfs_rq->h_load;
> +             load = div64_ul(load * se->avg.load_avg_contrib,
> +                             cfs_rq->runnable_load_avg + 1);
> +             cfs_rq = group_cfs_rq(se);
> +             cfs_rq->h_load = load;
> +             cfs_rq->last_h_load_update = rq->clock;
> +     }
>  }
>  
>  static unsigned long task_h_load(struct task_struct *p)
>  {
>       struct cfs_rq *cfs_rq = task_cfs_rq(p);
>  
> +     if (cfs_rq->last_h_load_update != rq_of(cfs_rq)->clock)
> +             update_cfs_rq_h_load(cfs_rq);
> +
>       return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
>                       cfs_rq->runnable_load_avg + 1);
>  }

OK, fair enough. It does somewhat rely on us getting the single
rq->clock update thing right, but that should be ok.

But yeah, when you have stupid many cgroups we quickly need less h_load
instances than there are cgroups.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to