On 7/10/2025 4:27 PM, Li Chen wrote:
>       /*
>        * .. and append 'j' levels of NUMA goodness.
>        */
>       for (j = 1; j < nr_levels; i++, j++) {
> -             tl[i] = (struct sched_domain_topology_level){
> -                     .mask = sd_numa_mask,
> -                     .sd_flags = cpu_numa_flags,
> -                     .flags = SDTL_OVERLAP,
> -                     .numa_level = j,
> -                     SD_INIT_NAME(NUMA)
> -             };
> +             tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA);
> +             tl[i].numa_level = j;
> +             tl[i].flags = SDTL_OVERLAP;

Tangential discussion: I was looking at this and was wondering why we
need a "tl->flags" when there is already sd_flags() function and we can
simply add SD_OVERLAP to sd_numa_flags().

I think "tl->flags" was needed when the idea of overlap domains was
added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain
spans") when it depended on "FORCE_SD_OVERLAP" sched_feat() which
allowed toggling this off but that was done away with in commit
af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") so perhaps we
can get rid of it now?

Relying on SD_NUMA should be enough currently. Peter, Valentin, what do
you think of something like below?

(Build and boot tested on top of this series on tip:sched/core)

diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
index b04a5d04dee9..42839cfa2778 100644
--- a/include/linux/sched/sd_flags.h
+++ b/include/linux/sched/sd_flags.h
@@ -153,14 +153,6 @@ SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
  */
 SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS)
 
-/*
- * sched_groups of this level overlap
- *
- * SHARED_PARENT: Set for all NUMA levels above NODE.
- * NEEDS_GROUPS: Overlaps can only exist with more than one group.
- */
-SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
-
 /*
  * Cross-node balancing
  *
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 0d5daaa277b7..5263746b63e8 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -175,8 +175,6 @@ bool cpus_share_resources(int this_cpu, int that_cpu);
 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
 typedef int (*sched_domain_flags_f)(void);
 
-#define SDTL_OVERLAP   0x01
-
 struct sd_data {
        struct sched_domain *__percpu *sd;
        struct sched_domain_shared *__percpu *sds;
@@ -187,7 +185,6 @@ struct sd_data {
 struct sched_domain_topology_level {
        sched_domain_mask_f mask;
        sched_domain_flags_f sd_flags;
-       int                 flags;
        int                 numa_level;
        struct sd_data      data;
        char                *name;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 20a845697c1d..b9b4bbbf0af6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9926,9 +9926,9 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
        min_capacity = ULONG_MAX;
        max_capacity = 0;
 
-       if (child->flags & SD_OVERLAP) {
+       if (child->flags & SD_NUMA) {
                /*
-                * SD_OVERLAP domains cannot assume that child groups
+                * SD_NUMA domains cannot assume that child groups
                 * span the current group.
                 */
 
@@ -9941,7 +9941,7 @@ void update_group_capacity(struct sched_domain *sd, int 
cpu)
                }
        } else  {
                /*
-                * !SD_OVERLAP domains can assume that child groups
+                * !SD_NUMA domains can assume that child groups
                 * span the current group.
                 */
 
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d01f5a49f2e7..977e133bb8a4 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -89,7 +89,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, 
int cpu, int level,
                        break;
                }
 
-               if (!(sd->flags & SD_OVERLAP) &&
+               if (!(sd->flags & SD_NUMA) &&
                    cpumask_intersects(groupmask, sched_group_span(group))) {
                        printk(KERN_CONT "\n");
                        printk(KERN_ERR "ERROR: repeated CPUs\n");
@@ -102,7 +102,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, 
int cpu, int level,
                                group->sgc->id,
                                cpumask_pr_args(sched_group_span(group)));
 
-               if ((sd->flags & SD_OVERLAP) &&
+               if ((sd->flags & SD_NUMA) &&
                    !cpumask_equal(group_balance_mask(group), 
sched_group_span(group))) {
                        printk(KERN_CONT " mask=%*pbl",
                                cpumask_pr_args(group_balance_mask(group)));
@@ -1344,7 +1344,7 @@ void sched_update_asym_prefer_cpu(int cpu, int old_prio, 
int new_prio)
                 * "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu"
                 * which is shared by all the overlapping groups.
                 */
-               WARN_ON_ONCE(sd->flags & SD_OVERLAP);
+               WARN_ON_ONCE(sd->flags & SD_NUMA);
 
                sg = sd->groups;
                if (cpu != sg->asym_prefer_cpu) {
@@ -2016,7 +2016,6 @@ void sched_init_numa(int offline_node)
        for (j = 1; j < nr_levels; i++, j++) {
                tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA);
                tl[i].numa_level = j;
-               tl[i].flags = SDTL_OVERLAP;
        }
 
        sched_domain_topology_saved = sched_domain_topology;
@@ -2327,7 +2326,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
 
                        if (sdd->sd) {
                                sd = *per_cpu_ptr(sdd->sd, j);
-                               if (sd && (sd->flags & SD_OVERLAP))
+                               if (sd && (sd->flags & SD_NUMA))
                                        free_sched_groups(sd->groups, 0);
                                kfree(*per_cpu_ptr(sdd->sd, j));
                        }
@@ -2393,9 +2392,13 @@ static bool topology_span_sane(const struct cpumask 
*cpu_map)
        id_seen = sched_domains_tmpmask2;
 
        for_each_sd_topology(tl) {
+               int tl_common_flags = 0;
+
+               if (tl->sd_flags)
+                       tl_common_flags = (*tl->sd_flags)();
 
                /* NUMA levels are allowed to overlap */
-               if (tl->flags & SDTL_OVERLAP)
+               if (tl_common_flags & SD_NUMA)
                        continue;
 
                cpumask_clear(covered);
@@ -2466,8 +2469,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct 
sched_domain_attr *att
 
                        if (tl == sched_domain_topology)
                                *per_cpu_ptr(d.sd, i) = sd;
-                       if (tl->flags & SDTL_OVERLAP)
-                               sd->flags |= SD_OVERLAP;
                        if (cpumask_equal(cpu_map, sched_domain_span(sd)))
                                break;
                }
@@ -2480,7 +2481,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct 
sched_domain_attr *att
        for_each_cpu(i, cpu_map) {
                for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
                        sd->span_weight = cpumask_weight(sched_domain_span(sd));
-                       if (sd->flags & SD_OVERLAP) {
+                       if (sd->flags & SD_NUMA) {
                                if (build_overlap_sched_groups(sd, i))
                                        goto error;
                        } else {
---

We can also keep SD_OVERLAP and only remove SDTL_OVERLAP, tl->flags if
that is preferred or just have them both if you see a future !NUMA
usecases for overlapping domains.

>       }
>  
>       sched_domain_topology_saved = sched_domain_topology;

-- 
Thanks and Regards,
Prateek


Reply via email to