From: Dietmar Eggemann <[email protected]> This patch incorporates struct sched_domain_topology_info info into struct sched_domain_topology_level. It updates sd_init_numa() to reflect the change that conventional (SMT, MC, BOOK, CPU) level initialization relies on the topology_info[] array and not on the default_topology[] any more.
Moreover a counterpart function sched_init_conv() is introduced to handle the allocation of the topology array for a !CONFIG_NUMA system. The patch deletes the default topology array default_topology[] and the SD_INIT_FUNC() macro which are not used any more. The function sd_local_flags() is deleted too and the appropriate functionality is directly incorporated into the NUMA specific condition path in sd_init(). Signed-off-by: Dietmar Eggemann <[email protected]> --- kernel/sched/core.c | 247 ++++++++++++++++++++++++++++----------------------- 1 file changed, 135 insertions(+), 112 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 523bb43756d6..90aa7c3d3a00 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5006,15 +5006,10 @@ enum s_alloc { sa_none, }; -struct sched_domain_topology_level; - -typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); - #define SDTL_OVERLAP 0x01 struct sched_domain_topology_level { - sched_domain_init_f init; - sched_domain_mask_f mask; + struct sched_domain_topology_info info; int flags; int numa_level; struct sd_data data; @@ -5254,28 +5249,6 @@ int __weak arch_sd_sibling_asym_packing(void) # define SD_INIT_NAME(sd, type) do { } while (0) #endif -#define SD_INIT_FUNC(type) \ -static noinline struct sched_domain * \ -sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ -{ \ - struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \ - *sd = SD_##type##_INIT; \ - SD_INIT_NAME(sd, type); \ - sd->private = &tl->data; \ - return sd; \ -} - -SD_INIT_FUNC(CPU) -#ifdef CONFIG_SCHED_SMT - SD_INIT_FUNC(SIBLING) -#endif -#ifdef CONFIG_SCHED_MC - SD_INIT_FUNC(MC) -#endif -#ifdef CONFIG_SCHED_BOOK - SD_INIT_FUNC(BOOK) -#endif - static int default_relax_domain_level = -1; int sched_domain_level_max; @@ -5364,23 +5337,6 @@ static void claim_allocations(int cpu, struct sched_domain *sd) } /* - * Topology list, bottom-up. - */ -static struct sched_domain_topology_level default_topology[] = { -#ifdef CONFIG_SCHED_SMT - { sd_init_SIBLING, cpu_smt_mask, }, -#endif -#ifdef CONFIG_SCHED_MC - { sd_init_MC, cpu_coregroup_mask, }, -#endif -#ifdef CONFIG_SCHED_BOOK - { sd_init_BOOK, cpu_book_mask, }, -#endif - { sd_init_CPU, cpu_cpu_mask, }, - { NULL, }, -}; - -/* * Topology info list, bottom-up. */ static struct sched_domain_topology_info default_topology_info[] = { @@ -5394,10 +5350,9 @@ static struct sched_domain_topology_info default_topology_info[] = { { cpu_book_mask, }, #endif { cpu_cpu_mask, }, - { NULL, }, }; -static struct sched_domain_topology_level *sched_domain_topology = default_topology; +static struct sched_domain_topology_level *sched_domain_topology; static struct sched_domain_topology_info *sched_domain_topology_info = default_topology_info; static unsigned int sched_domain_topology_info_size = @@ -5411,7 +5366,7 @@ set_sd_topology_info(struct sched_domain_topology_info *ti, unsigned int s) } #define for_each_sd_topology(tl) \ - for (tl = sched_domain_topology; tl->init; tl++) + for (tl = sched_domain_topology; tl->info.mask; tl++) #ifdef CONFIG_NUMA @@ -5420,61 +5375,6 @@ static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; static int sched_domains_curr_level; -static inline int sd_local_flags(int level) -{ - if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) - return 0; - - return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; -} - -static struct sched_domain * -sd_numa_init(struct sched_domain_topology_level *tl, int cpu) -{ - struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); - int level = tl->numa_level; - int sd_weight = cpumask_weight( - sched_domains_numa_masks[level][cpu_to_node(cpu)]); - - *sd = (struct sched_domain){ - .min_interval = sd_weight, - .max_interval = 2*sd_weight, - .busy_factor = 32, - .imbalance_pct = 125, - .cache_nice_tries = 2, - .busy_idx = 3, - .idle_idx = 2, - .newidle_idx = 0, - .wake_idx = 0, - .forkexec_idx = 0, - - .flags = 1*SD_LOAD_BALANCE - | 1*SD_BALANCE_NEWIDLE - | 0*SD_BALANCE_EXEC - | 0*SD_BALANCE_FORK - | 0*SD_BALANCE_WAKE - | 0*SD_WAKE_AFFINE - | 0*SD_SHARE_CPUPOWER - | 0*SD_SHARE_PKG_RESOURCES - | 1*SD_SERIALIZE - | 0*SD_PREFER_SIBLING - | 1*SD_NUMA - | sd_local_flags(level) - , - .last_balance = jiffies, - .balance_interval = sd_weight, - }; - SD_INIT_NAME(sd, NUMA); - sd->private = &tl->data; - - /* - * Ugly hack to pass state to sd_numa_mask()... - */ - sched_domains_curr_level = tl->numa_level; - - return sd; -} - static const struct cpumask *sd_numa_mask(int cpu) { return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; @@ -5520,6 +5420,7 @@ static void sched_init_numa(void) { int next_distance, curr_distance = node_distance(0, 0); struct sched_domain_topology_level *tl; + struct sched_domain_topology_info *ti = sched_domain_topology_info; int level = 0; int i, j, k; @@ -5618,24 +5519,29 @@ static void sched_init_numa(void) } } - tl = kzalloc((ARRAY_SIZE(default_topology) + level) * - sizeof(struct sched_domain_topology_level), GFP_KERNEL); + /* + * An extra empty struct sched_domain_topology_level element at the end + * of the array is needed to let for_each_sd_topology() work correctly. + */ + tl = kzalloc((sched_domain_topology_info_size + level + 1) * + sizeof(struct sched_domain_topology_level), + GFP_KERNEL); if (!tl) return; /* - * Copy the default topology bits.. + * Copy the topology info bits.. */ - for (i = 0; default_topology[i].init; i++) - tl[i] = default_topology[i]; + for (i = 0; i < sched_domain_topology_info_size; i++) + tl[i].info = ti[i]; /* * .. and append 'j' levels of NUMA goodness. */ for (j = 0; j < level; i++, j++) { tl[i] = (struct sched_domain_topology_level){ - .init = sd_numa_init, - .mask = sd_numa_mask, + .info.mask = sd_numa_mask, + .info.flags = SD_NUMA, .flags = SDTL_OVERLAP, .numa_level = j, }; @@ -5646,6 +5552,10 @@ static void sched_init_numa(void) sched_domains_numa_levels = level; } +static void sched_init_conv(void) +{ +} + static void sched_domains_numa_masks_set(int cpu) { int i, j; @@ -5698,6 +5608,31 @@ static inline void sched_init_numa(void) { } +static void sched_init_conv(void) +{ + struct sched_domain_topology_level *tl; + struct sched_domain_topology_info *ti = sched_domain_topology_info; + int i; + + /* + * An extra empty struct sched_domain_topology_level element at the end + * of the array is needed to let for_each_sd_topology() work correctly. + */ + tl = kzalloc((sched_domain_topology_info_size + 1) * + sizeof(struct sched_domain_topology_level), + GFP_KERNEL); + if (!tl) + return; + + /* + * Copy the topology info bits.. + */ + for (i = 0; i < sched_domain_topology_info_size; i++) + tl[i].info = ti[i]; + + sched_domain_topology = tl; +} + static int sched_domains_numa_masks_update(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -5706,6 +5641,93 @@ static int sched_domains_numa_masks_update(struct notifier_block *nfb, } #endif /* CONFIG_NUMA */ +static struct sched_domain * +sd_init(struct sched_domain_topology_level *tl, int cpu) +{ + struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); + int sd_weight; + +#ifdef CONFIG_NUMA + /* + * Ugly hack to pass state to sd_numa_mask()... + */ + sched_domains_curr_level = tl->numa_level; +#endif + + sd_weight = cpumask_weight(tl->info.mask(cpu)); + + if (WARN_ONCE(tl->info.flags & ~TOPOLOGY_SD_FLAGS, + "wrong flags in topology info\n")) + tl->info.flags &= ~TOPOLOGY_SD_FLAGS; + + *sd = (struct sched_domain){ + .min_interval = sd_weight, + .max_interval = 2*sd_weight, + .busy_factor = 64, + .imbalance_pct = 125, + + .flags = 1*SD_LOAD_BALANCE + | 1*SD_BALANCE_NEWIDLE + | 1*SD_BALANCE_EXEC + | 1*SD_BALANCE_FORK + | 1*SD_WAKE_AFFINE + | tl->info.flags + , + + .last_balance = jiffies, + .balance_interval = sd_weight, + }; + + /* + * Convert topological properties into behaviour. + */ + + if (sd->flags & SD_SHARE_CPUPOWER) { + sd->imbalance_pct = 110; + sd->smt_gain = 1178; /* ~15% */ + + /* + * Call SMT specific arch topology function. + * This goes away once the powerpc arch uses + * the new interface for scheduler domain + * setup. + */ + sd->flags |= arch_sd_sibling_asym_packing(); + + SD_INIT_NAME(sd, SMT); + } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { + sd->cache_nice_tries = 1; + sd->busy_idx = 2; + + SD_INIT_NAME(sd, MC); +#ifdef CONFIG_NUMA + } else if (sd->flags & SD_NUMA) { + sd->busy_factor = 32, + sd->cache_nice_tries = 2; + sd->busy_idx = 3; + sd->idle_idx = 2; + sd->flags |= SD_SERIALIZE; + if (sched_domains_numa_distance[tl->numa_level] + > RECLAIM_DISTANCE) { + sd->flags &= ~(SD_BALANCE_EXEC | + SD_BALANCE_FORK | + SD_WAKE_AFFINE); + } +#endif + } else { + sd->cache_nice_tries = 1; + sd->busy_idx = 2; + sd->idle_idx = 1; + sd->flags |= SD_PREFER_SIBLING; + + SD_INIT_NAME(sd, CPU); + } + + sd->private = &tl->data; + + return sd; +} + static int __sdt_alloc(const struct cpumask *cpu_map) { struct sched_domain_topology_level *tl; @@ -5795,11 +5817,11 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, const struct cpumask *cpu_map, struct sched_domain_attr *attr, struct sched_domain *child, int cpu) { - struct sched_domain *sd = tl->init(tl, cpu); + struct sched_domain *sd = sd_init(tl, cpu); if (!sd) return child; - cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); + cpumask_and(sched_domain_span(sd), cpu_map, tl->info.mask(cpu)); if (child) { sd->level = child->level + 1; sched_domain_level_max = max(sched_domain_level_max, sd->level); @@ -6138,6 +6160,7 @@ void __init sched_init_smp(void) alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); alloc_cpumask_var(&fallback_doms, GFP_KERNEL); + sched_init_conv(); sched_init_numa(); /* -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/

