From: Dietmar Eggemann <[email protected]>

This patch incorporates struct sched_domain_topology_info info into struct
sched_domain_topology_level.  It updates sd_init_numa() to reflect the
change that conventional (SMT, MC, BOOK, CPU)  level initialization relies
on the topology_info[] array and not on the default_topology[] any more.

Moreover a counterpart function sched_init_conv() is introduced to handle
the allocation of the topology array for a !CONFIG_NUMA system.

The patch deletes the default topology array default_topology[] and the
SD_INIT_FUNC() macro which are not used any more. The function
sd_local_flags() is deleted too and the appropriate functionality is
directly incorporated into the NUMA specific condition path in sd_init().

Signed-off-by: Dietmar Eggemann <[email protected]>
---
 kernel/sched/core.c |  247 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 135 insertions(+), 112 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 523bb43756d6..90aa7c3d3a00 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5006,15 +5006,10 @@ enum s_alloc {
        sa_none,
 };
 
-struct sched_domain_topology_level;
-
-typedef struct sched_domain *(*sched_domain_init_f)(struct 
sched_domain_topology_level *tl, int cpu);
-
 #define SDTL_OVERLAP   0x01
 
 struct sched_domain_topology_level {
-       sched_domain_init_f init;
-       sched_domain_mask_f mask;
+       struct sched_domain_topology_info info;
        int                 flags;
        int                 numa_level;
        struct sd_data      data;
@@ -5254,28 +5249,6 @@ int __weak arch_sd_sibling_asym_packing(void)
 # define SD_INIT_NAME(sd, type)                do { } while (0)
 #endif
 
-#define SD_INIT_FUNC(type)                                             \
-static noinline struct sched_domain *                                  \
-sd_init_##type(struct sched_domain_topology_level *tl, int cpu)        \
-{                                                                      \
-       struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);       \
-       *sd = SD_##type##_INIT;                                         \
-       SD_INIT_NAME(sd, type);                                         \
-       sd->private = &tl->data;                                        \
-       return sd;                                                      \
-}
-
-SD_INIT_FUNC(CPU)
-#ifdef CONFIG_SCHED_SMT
- SD_INIT_FUNC(SIBLING)
-#endif
-#ifdef CONFIG_SCHED_MC
- SD_INIT_FUNC(MC)
-#endif
-#ifdef CONFIG_SCHED_BOOK
- SD_INIT_FUNC(BOOK)
-#endif
-
 static int default_relax_domain_level = -1;
 int sched_domain_level_max;
 
@@ -5364,23 +5337,6 @@ static void claim_allocations(int cpu, struct 
sched_domain *sd)
 }
 
 /*
- * Topology list, bottom-up.
- */
-static struct sched_domain_topology_level default_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-       { sd_init_SIBLING, cpu_smt_mask, },
-#endif
-#ifdef CONFIG_SCHED_MC
-       { sd_init_MC, cpu_coregroup_mask, },
-#endif
-#ifdef CONFIG_SCHED_BOOK
-       { sd_init_BOOK, cpu_book_mask, },
-#endif
-       { sd_init_CPU, cpu_cpu_mask, },
-       { NULL, },
-};
-
-/*
  * Topology info list, bottom-up.
  */
 static struct sched_domain_topology_info default_topology_info[] = {
@@ -5394,10 +5350,9 @@ static struct sched_domain_topology_info 
default_topology_info[] = {
        { cpu_book_mask, },
 #endif
        { cpu_cpu_mask, },
-       { NULL, },
 };
 
-static struct sched_domain_topology_level *sched_domain_topology = 
default_topology;
+static struct sched_domain_topology_level *sched_domain_topology;
 static struct sched_domain_topology_info *sched_domain_topology_info =
                default_topology_info;
 static unsigned int sched_domain_topology_info_size =
@@ -5411,7 +5366,7 @@ set_sd_topology_info(struct sched_domain_topology_info 
*ti, unsigned int s)
 }
 
 #define for_each_sd_topology(tl)                       \
-       for (tl = sched_domain_topology; tl->init; tl++)
+       for (tl = sched_domain_topology; tl->info.mask; tl++)
 
 #ifdef CONFIG_NUMA
 
@@ -5420,61 +5375,6 @@ static int *sched_domains_numa_distance;
 static struct cpumask ***sched_domains_numa_masks;
 static int sched_domains_curr_level;
 
-static inline int sd_local_flags(int level)
-{
-       if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE)
-               return 0;
-
-       return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
-}
-
-static struct sched_domain *
-sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
-{
-       struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
-       int level = tl->numa_level;
-       int sd_weight = cpumask_weight(
-                       sched_domains_numa_masks[level][cpu_to_node(cpu)]);
-
-       *sd = (struct sched_domain){
-               .min_interval           = sd_weight,
-               .max_interval           = 2*sd_weight,
-               .busy_factor            = 32,
-               .imbalance_pct          = 125,
-               .cache_nice_tries       = 2,
-               .busy_idx               = 3,
-               .idle_idx               = 2,
-               .newidle_idx            = 0,
-               .wake_idx               = 0,
-               .forkexec_idx           = 0,
-
-               .flags                  = 1*SD_LOAD_BALANCE
-                                       | 1*SD_BALANCE_NEWIDLE
-                                       | 0*SD_BALANCE_EXEC
-                                       | 0*SD_BALANCE_FORK
-                                       | 0*SD_BALANCE_WAKE
-                                       | 0*SD_WAKE_AFFINE
-                                       | 0*SD_SHARE_CPUPOWER
-                                       | 0*SD_SHARE_PKG_RESOURCES
-                                       | 1*SD_SERIALIZE
-                                       | 0*SD_PREFER_SIBLING
-                                       | 1*SD_NUMA
-                                       | sd_local_flags(level)
-                                       ,
-               .last_balance           = jiffies,
-               .balance_interval       = sd_weight,
-       };
-       SD_INIT_NAME(sd, NUMA);
-       sd->private = &tl->data;
-
-       /*
-        * Ugly hack to pass state to sd_numa_mask()...
-        */
-       sched_domains_curr_level = tl->numa_level;
-
-       return sd;
-}
-
 static const struct cpumask *sd_numa_mask(int cpu)
 {
        return 
sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
@@ -5520,6 +5420,7 @@ static void sched_init_numa(void)
 {
        int next_distance, curr_distance = node_distance(0, 0);
        struct sched_domain_topology_level *tl;
+       struct sched_domain_topology_info *ti = sched_domain_topology_info;
        int level = 0;
        int i, j, k;
 
@@ -5618,24 +5519,29 @@ static void sched_init_numa(void)
                }
        }
 
-       tl = kzalloc((ARRAY_SIZE(default_topology) + level) *
-                       sizeof(struct sched_domain_topology_level), GFP_KERNEL);
+       /*
+        * An extra empty struct sched_domain_topology_level element at the end
+        * of the array is needed to let for_each_sd_topology() work correctly.
+        */
+       tl = kzalloc((sched_domain_topology_info_size + level + 1) *
+                       sizeof(struct sched_domain_topology_level),
+                       GFP_KERNEL);
        if (!tl)
                return;
 
        /*
-        * Copy the default topology bits..
+        * Copy the topology info bits..
         */
-       for (i = 0; default_topology[i].init; i++)
-               tl[i] = default_topology[i];
+       for (i = 0; i < sched_domain_topology_info_size; i++)
+               tl[i].info = ti[i];
 
        /*
         * .. and append 'j' levels of NUMA goodness.
         */
        for (j = 0; j < level; i++, j++) {
                tl[i] = (struct sched_domain_topology_level){
-                       .init = sd_numa_init,
-                       .mask = sd_numa_mask,
+                       .info.mask = sd_numa_mask,
+                       .info.flags = SD_NUMA,
                        .flags = SDTL_OVERLAP,
                        .numa_level = j,
                };
@@ -5646,6 +5552,10 @@ static void sched_init_numa(void)
        sched_domains_numa_levels = level;
 }
 
+static void sched_init_conv(void)
+{
+}
+
 static void sched_domains_numa_masks_set(int cpu)
 {
        int i, j;
@@ -5698,6 +5608,31 @@ static inline void sched_init_numa(void)
 {
 }
 
+static void sched_init_conv(void)
+{
+       struct sched_domain_topology_level *tl;
+       struct sched_domain_topology_info *ti = sched_domain_topology_info;
+       int i;
+
+       /*
+        * An extra empty struct sched_domain_topology_level element at the end
+        * of the array is needed to let for_each_sd_topology() work correctly.
+        */
+       tl = kzalloc((sched_domain_topology_info_size + 1) *
+               sizeof(struct sched_domain_topology_level),
+               GFP_KERNEL);
+       if (!tl)
+               return;
+
+       /*
+        * Copy the topology info bits..
+        */
+       for (i = 0; i < sched_domain_topology_info_size; i++)
+               tl[i].info = ti[i];
+
+       sched_domain_topology = tl;
+}
+
 static int sched_domains_numa_masks_update(struct notifier_block *nfb,
                                           unsigned long action,
                                           void *hcpu)
@@ -5706,6 +5641,93 @@ static int sched_domains_numa_masks_update(struct 
notifier_block *nfb,
 }
 #endif /* CONFIG_NUMA */
 
+static struct sched_domain *
+sd_init(struct sched_domain_topology_level *tl, int cpu)
+{
+       struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
+       int sd_weight;
+
+#ifdef CONFIG_NUMA
+       /*
+        * Ugly hack to pass state to sd_numa_mask()...
+        */
+       sched_domains_curr_level = tl->numa_level;
+#endif
+
+       sd_weight = cpumask_weight(tl->info.mask(cpu));
+
+       if (WARN_ONCE(tl->info.flags & ~TOPOLOGY_SD_FLAGS,
+                       "wrong flags in topology info\n"))
+               tl->info.flags &= ~TOPOLOGY_SD_FLAGS;
+
+       *sd = (struct sched_domain){
+                               .min_interval  = sd_weight,
+                               .max_interval  = 2*sd_weight,
+                               .busy_factor   = 64,
+                               .imbalance_pct = 125,
+
+                               .flags =  1*SD_LOAD_BALANCE
+                                               | 1*SD_BALANCE_NEWIDLE
+                                               | 1*SD_BALANCE_EXEC
+                                               | 1*SD_BALANCE_FORK
+                                               | 1*SD_WAKE_AFFINE
+                                               | tl->info.flags
+                                               ,
+
+                               .last_balance     = jiffies,
+                               .balance_interval = sd_weight,
+       };
+
+       /*
+        * Convert topological properties into behaviour.
+        */
+
+       if (sd->flags & SD_SHARE_CPUPOWER) {
+               sd->imbalance_pct = 110;
+               sd->smt_gain = 1178; /* ~15% */
+
+               /*
+                * Call SMT specific arch topology function.
+                * This goes away once the powerpc arch uses
+                * the new interface for scheduler domain
+                * setup.
+                */
+               sd->flags |= arch_sd_sibling_asym_packing();
+
+               SD_INIT_NAME(sd, SMT);
+       } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+               sd->cache_nice_tries = 1;
+               sd->busy_idx = 2;
+
+               SD_INIT_NAME(sd, MC);
+#ifdef CONFIG_NUMA
+       } else if (sd->flags & SD_NUMA) {
+               sd->busy_factor = 32,
+               sd->cache_nice_tries = 2;
+               sd->busy_idx = 3;
+               sd->idle_idx = 2;
+               sd->flags |= SD_SERIALIZE;
+               if (sched_domains_numa_distance[tl->numa_level]
+                               > RECLAIM_DISTANCE) {
+                       sd->flags &= ~(SD_BALANCE_EXEC |
+                                      SD_BALANCE_FORK |
+                                      SD_WAKE_AFFINE);
+               }
+#endif
+       } else {
+               sd->cache_nice_tries = 1;
+               sd->busy_idx = 2;
+               sd->idle_idx = 1;
+               sd->flags |= SD_PREFER_SIBLING;
+
+               SD_INIT_NAME(sd, CPU);
+       }
+
+       sd->private = &tl->data;
+
+       return sd;
+}
+
 static int __sdt_alloc(const struct cpumask *cpu_map)
 {
        struct sched_domain_topology_level *tl;
@@ -5795,11 +5817,11 @@ struct sched_domain *build_sched_domain(struct 
sched_domain_topology_level *tl,
                const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                struct sched_domain *child, int cpu)
 {
-       struct sched_domain *sd = tl->init(tl, cpu);
+       struct sched_domain *sd = sd_init(tl, cpu);
        if (!sd)
                return child;
 
-       cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+       cpumask_and(sched_domain_span(sd), cpu_map, tl->info.mask(cpu));
        if (child) {
                sd->level = child->level + 1;
                sched_domain_level_max = max(sched_domain_level_max, sd->level);
@@ -6138,6 +6160,7 @@ void __init sched_init_smp(void)
        alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
        alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
+       sched_init_conv();
        sched_init_numa();
 
        /*
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to