topology: the group balance cpu must be a cpu where the group is installed

Peter Zijlstra Tue, 25 Apr 2017 09:27:24 -0700

On Tue, Apr 25, 2017 at 12:56:23PM -0300, Lauro Venancio wrote:

> > Another thing I've been thinking about; I think we can do away with the
> > kzalloc() in build_group_from_child_sched_domain() and use the sdd->sg
> > storage.
> I considered this too. I decided to do not change this because I was not
> sure if the kzalloc() was there for performance reasons. Currently, all
> groups are allocated in the NUMA node they are used.
> If we use sdd->sg storage, we may have groups allocated in one NUMA node
> being used in another node.


Right.. I cannot remember :/

/me once again kicks himself for not writing more comments

It does save a few lines.. and I suspect that if we do this, we could
actually completely get rid of sched_group_capacity, since its now
always the same as the group (again), which should removes more lines
still.

But I'll shelf this patch for now.. we've got enough changes as is.

I still need to write a changelog for the new #2, which has become ugly
again, because its needs a second sched_domains_tmpmask.

(compile tested only)

---
 kernel/sched/topology.c |   76 ++++++++++++++++++------------------------------
 1 file changed, 29 insertions(+), 47 deletions(-)

--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -501,10 +501,8 @@ enum s_alloc {
  * balancing.
  */
 static void
-build_group_mask(struct sched_domain *sd, struct sched_group *sg, struct 
cpumask *mask)
+build_group_mask(struct sd_data *sdd, struct cpumask *sg_span, struct cpumask 
*mask)
 {
-       const struct cpumask *sg_span = sched_group_cpus(sg);
-       struct sd_data *sdd = sd->private;
        struct sched_domain *sibling;
        int i;
 
@@ -542,49 +540,34 @@ int group_balance_cpu(struct sched_group
 }
 
 static struct sched_group *
-build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
+get_overlap_group(struct sd_data *sdd, int cpu)
 {
-       struct sched_group *sg;
-       struct cpumask *sg_span;
+       struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+       struct sched_domain *child = sd->child;
+       struct sched_group *group;
+       struct cpumask *mask = sched_domains_tmpmask2;
 
-       sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
-                       GFP_KERNEL, cpu_to_node(cpu));
+       /*
+        * Overlap must have !overlap children.
+        * This is before degenerate throws them out.
+        */
+       BUG_ON(!sd->child);
 
-       if (!sg)
-               return NULL;
+       build_group_mask(sdd, sched_domain_span(child), mask);
+       cpu = cpumask_first_and(sched_domain_span(child), mask);
 
-       sg_span = sched_group_cpus(sg);
-       if (sd->child)
-               cpumask_copy(sg_span, sched_domain_span(sd->child));
-       else
-               cpumask_copy(sg_span, sched_domain_span(sd));
+       BUG_ON(cpu >= nr_cpu_ids);
 
-       return sg;
-}
+       group = *per_cpu_ptr(sdd->sg, cpu);
+       group->sgc = *per_cpu_ptr(sdd->sgc, cpu);
 
-static void init_overlap_sched_group(struct sched_domain *sd,
-                                    struct sched_group *sg)
-{
-       struct cpumask *mask = sched_domains_tmpmask2;
-       struct sd_data *sdd = sd->private;
-       struct cpumask *sg_span;
-       int cpu;
+       atomic_inc(&group->ref);
+       atomic_inc(&group->sgc->ref);
 
-       build_group_mask(sd, sg, mask);
-       cpu = cpumask_first_and(sched_group_cpus(sg), mask);
+       cpumask_copy(sched_group_cpus(group), sched_domain_span(child));
+       cpumask_copy(sched_group_mask(group), mask);
 
-       sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
-       if (atomic_inc_return(&sg->sgc->ref) == 1)
-               cpumask_copy(sched_group_mask(sg), mask);
-
-       /*
-        * Initialize sgc->capacity such that even if we mess up the
-        * domains and no possible iteration will get us here, we won't
-        * die on a /0 trap.
-        */
-       sg_span = sched_group_cpus(sg);
-       sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-       sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+       return group;
 }
 
 static int
@@ -620,14 +603,18 @@ build_overlap_sched_groups(struct sched_
                if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
                        continue;
 
-               sg = build_group_from_child_sched_domain(sibling, cpu);
-               if (!sg)
-                       goto fail;
+               sg = get_overlap_group(sdd, i);
 
                sg_span = sched_group_cpus(sg);
                cpumask_or(covered, covered, sg_span);
 
-               init_overlap_sched_group(sd, sg);
+               /*
+                * Initialize sgc->capacity such that even if we mess up the
+                * domains and no possible iteration will get us here, we won't
+                * die on a /0 trap.
+                */
+               sg->sgc->capacity = SCHED_CAPACITY_SCALE * 
cpumask_weight(sg_span);
+               sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
 
                if (!first)
                        first = sg;
@@ -639,11 +626,6 @@ build_overlap_sched_groups(struct sched_
        sd->groups = first;
 
        return 0;
-
-fail:
-       free_sched_groups(first, 0);
-
-       return -ENOMEM;
 }
 
 static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)

Re: [PATCH 4/4] sched/topology: the group balance cpu must be a cpu where the group is installed

Reply via email to