struct sched_group_capacity currently represents the compute capacity
sum of all cpus in the sched_group. Unless it is divided by the
group_weight to get the average capacity per cpu it hides differences in
cpu capacity for mixed capacity systems (e.g. high RT/IRQ utilization or
ARM big.LITTLE). But even the average may not be sufficient if the group
covers cpus of different capacities. Instead, by extending struct
sched_group_capacity to indicate max per-cpu capacity in the group a
suitable group for a given task utilization can easily be found such
that cpus with reduced capacity can be avoided for tasks with high
utilization (not implemented by this patch).

cc: Ingo Molnar <mi...@redhat.com>
cc: Peter Zijlstra <pet...@infradead.org>

Signed-off-by: Morten Rasmussen <morten.rasmus...@arm.com>
---
 kernel/sched/core.c  |  3 ++-
 kernel/sched/fair.c  | 17 ++++++++++++-----
 kernel/sched/sched.h |  3 ++-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d4059c..4dd5cd7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5599,7 +5599,7 @@ static int sched_domain_debug_one(struct sched_domain 
*sd, int cpu, int level,
                printk(KERN_CONT " %*pbl",
                       cpumask_pr_args(sched_group_cpus(group)));
                if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
-                       printk(KERN_CONT " (cpu_capacity = %d)",
+                       printk(KERN_CONT " (cpu_capacity = %lu)",
                                group->sgc->capacity);
                }
 
@@ -6070,6 +6070,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int 
cpu)
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * 
cpumask_weight(sg_span);
+               sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 
                /*
                 * Make sure the first group of this domain contains the
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d4c5339..0dd2d9c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6658,13 +6658,14 @@ static void update_cpu_capacity(struct sched_domain 
*sd, int cpu)
 
        cpu_rq(cpu)->cpu_capacity = capacity;
        sdg->sgc->capacity = capacity;
+       sdg->sgc->max_capacity = capacity;
 }
 
 void update_group_capacity(struct sched_domain *sd, int cpu)
 {
        struct sched_domain *child = sd->child;
        struct sched_group *group, *sdg = sd->groups;
-       unsigned long capacity;
+       unsigned long capacity, max_capacity;
        unsigned long interval;
 
        interval = msecs_to_jiffies(sd->balance_interval);
@@ -6677,6 +6678,7 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
        }
 
        capacity = 0;
+       max_capacity = 0;
 
        if (child->flags & SD_OVERLAP) {
                /*
@@ -6701,11 +6703,12 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
                         */
                        if (unlikely(!rq->sd)) {
                                capacity += capacity_of(cpu);
-                               continue;
+                       } else {
+                               sgc = rq->sd->groups->sgc;
+                               capacity += sgc->capacity;
                        }
 
-                       sgc = rq->sd->groups->sgc;
-                       capacity += sgc->capacity;
+                       max_capacity = max(capacity, max_capacity);
                }
        } else  {
                /*
@@ -6715,12 +6718,16 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
 
                group = child->groups;
                do {
-                       capacity += group->sgc->capacity;
+                       struct sched_group_capacity *sgc = group->sgc;
+
+                       capacity += sgc->capacity;
+                       max_capacity = max(sgc->max_capacity, max_capacity);
                        group = group->next;
                } while (group != child->groups);
        }
 
        sdg->sgc->capacity = capacity;
+       sdg->sgc->max_capacity = max_capacity;
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 72150c2..359f689 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -868,7 +868,8 @@ struct sched_group_capacity {
         * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
         * for a single CPU.
         */
-       unsigned int capacity;
+       unsigned long capacity;
+       unsigned long max_capacity; /* Max per-cpu capacity in group */
        unsigned long next_update;
        int imbalance; /* XXX unrelated to capacity but shared group state */
        /*
-- 
1.9.1

Reply via email to