The current sg->min_capacity tracks the lowest per-cpu compute capacity
available in the sched_group when rt/irq pressure is taken into account.
Minimum capacity isn't the ideal metric for tracking if a sched_group
needs offloading to another sched_group for some scenarios, e.g. a
sched_group with multiple cpus if only one is under heavy pressure.
Tracking maximum capacity isn't perfect either but a better choice for
some situations as it indicates that the sched_group definitely compute
capacity constrained either due to rt/irq pressure on all cpus or
asymmetric cpu capacities (e.g. big.LITTLE).

cc: Ingo Molnar <[email protected]>
cc: Peter Zijlstra <[email protected]>

Signed-off-by: Morten Rasmussen <[email protected]>
---
 kernel/sched/fair.c     | 24 ++++++++++++++++++++----
 kernel/sched/sched.h    |  1 +
 kernel/sched/topology.c |  2 ++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ed7eb2ac068f..6af3354e9e26 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7929,13 +7929,14 @@ static void update_cpu_capacity(struct sched_domain 
*sd, int cpu)
        cpu_rq(cpu)->cpu_capacity = capacity;
        sdg->sgc->capacity = capacity;
        sdg->sgc->min_capacity = capacity;
+       sdg->sgc->max_capacity = capacity;
 }
 
 void update_group_capacity(struct sched_domain *sd, int cpu)
 {
        struct sched_domain *child = sd->child;
        struct sched_group *group, *sdg = sd->groups;
-       unsigned long capacity, min_capacity;
+       unsigned long capacity, min_capacity, max_capacity;
        unsigned long interval;
 
        interval = msecs_to_jiffies(sd->balance_interval);
@@ -7949,6 +7950,7 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
 
        capacity = 0;
        min_capacity = ULONG_MAX;
+       max_capacity = 0;
 
        if (child->flags & SD_OVERLAP) {
                /*
@@ -7979,6 +7981,7 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
                        }
 
                        min_capacity = min(capacity, min_capacity);
+                       max_capacity = max(capacity, max_capacity);
                }
        } else  {
                /*
@@ -7992,12 +7995,14 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
 
                        capacity += sgc->capacity;
                        min_capacity = min(sgc->min_capacity, min_capacity);
+                       max_capacity = max(sgc->max_capacity, max_capacity);
                        group = group->next;
                } while (group != child->groups);
        }
 
        sdg->sgc->capacity = capacity;
        sdg->sgc->min_capacity = min_capacity;
+       sdg->sgc->max_capacity = max_capacity;
 }
 
 /*
@@ -8093,16 +8098,27 @@ group_is_overloaded(struct lb_env *env, struct 
sg_lb_stats *sgs)
 }
 
 /*
- * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ * group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
  * per-CPU capacity than sched_group ref.
  */
 static inline bool
-group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
 {
        return sg->sgc->min_capacity * capacity_margin <
                                                ref->sgc->min_capacity * 1024;
 }
 
+/*
+ * group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
+ * per-CPU capacity_orig than sched_group ref.
+ */
+static inline bool
+group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+{
+       return sg->sgc->max_capacity * capacity_margin <
+                                               ref->sgc->max_capacity * 1024;
+}
+
 static inline enum
 group_type group_classify(struct sched_group *group,
                          struct sg_lb_stats *sgs)
@@ -8248,7 +8264,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
         * power/energy consequences are not considered.
         */
        if (sgs->sum_nr_running <= sgs->group_weight &&
-           group_smaller_cpu_capacity(sds->local, sg))
+           group_smaller_min_cpu_capacity(sds->local, sg))
                return false;
 
 asym_packing:
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 273d07dedc90..5ed67122cf59 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1165,6 +1165,7 @@ struct sched_group_capacity {
         */
        unsigned long           capacity;
        unsigned long           min_capacity;           /* Min per-CPU capacity 
in group */
+       unsigned long           max_capacity;           /* Max per-CPU capacity 
in group */
        unsigned long           next_update;
        int                     imbalance;              /* XXX unrelated to 
capacity but shared group state */
 
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index edc87e35fc75..f32bf3a998b1 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -708,6 +708,7 @@ static void init_overlap_sched_group(struct sched_domain 
*sd,
        sg_span = sched_group_span(sg);
        sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
        sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+       sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 }
 
 static int
@@ -867,6 +868,7 @@ static struct sched_group *get_group(int cpu, struct 
sd_data *sdd)
 
        sg->sgc->capacity = SCHED_CAPACITY_SCALE * 
cpumask_weight(sched_group_span(sg));
        sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+       sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 
        return sg;
 }
-- 
2.7.4

Reply via email to