Re: [PATCH 06/11] powerpc/smp: Generalize 2nd sched domain
On Mon, Jul 20, 2020 at 11:49:11AM +0530, Srikar Dronamraju wrote: > * Gautham R Shenoy [2020-07-17 12:07:55]: > > > On Tue, Jul 14, 2020 at 10:06:19AM +0530, Srikar Dronamraju wrote: > > > Currently "CACHE" domain happens to be the 2nd sched domain as per > > > powerpc_topology. This domain will collapse if cpumask of l2-cache is > > > same as SMT domain. However we could generalize this domain such that it > > > could mean either be a "CACHE" domain or a "BIGCORE" domain. > > > > > > While setting up the "CACHE" domain, check if shared_cache is already > > > set. > > > > > > Cc: linuxppc-dev > > > Cc: Michael Ellerman > > > Cc: Nick Piggin > > > Cc: Oliver OHalloran > > > Cc: Nathan Lynch > > > Cc: Michael Neuling > > > Cc: Anton Blanchard > > > Cc: Gautham R Shenoy > > > Cc: Vaidyanathan Srinivasan > > > Signed-off-by: Srikar Dronamraju > > > --- > > > @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int > > > cpu) > > > } > > > #endif > > > > > > +static const struct cpumask *cpu_bigcore_mask(int cpu) > > > +{ > > > + return cpu_core_mask(cpu); > > > > It should be cpu_smt_mask() if we want the redundant big-core to be > > degenerated in favour of the SMT level on P8, no? Because > > cpu_core_mask refers to all the CPUs that are in the same chip. > > > > Right, but it cant be cpu_smt_mask since cpu_smt_mask is only enabled in > CONFIG_SCHED_SMT. I was looking at using sibling_map, but we have to careful > for power9 / PowerNV mode. Guess that should be fine. Ok. > > > > +} > > > + > > > static struct sched_domain_topology_level powerpc_topology[] = { > > > #ifdef CONFIG_SCHED_SMT > > > { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, > > > #endif > > > - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, > > > + { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) }, > > > { cpu_cpu_mask, SD_INIT_NAME(DIE) }, > > > { NULL, }, > > > }; > > > @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu) > > > void start_secondary(void *unused) > > > { > > > unsigned int cpu = smp_processor_id(); > > > - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > > > > > > mmgrab(_mm); > > > current->active_mm = _mm; > > > @@ -1345,14 +1351,20 @@ void start_secondary(void *unused) > > > /* Update topology CPU masks */ > > > add_cpu_to_masks(cpu); > > > > > > - if (has_big_cores) > > > - sibling_mask = cpu_smallcore_mask; > > > /* > > >* Check for any shared caches. Note that this must be done on a > > >* per-core basis because one core in the pair might be disabled. > > >*/ > > > - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) > > > - shared_caches = true; > > > + if (!shared_caches) { > > > + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > > > + struct cpumask *mask = cpu_l2_cache_mask(cpu); > > > + > > > + if (has_big_cores) > > > + sibling_mask = cpu_smallcore_mask; > > > + > > > + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) > > > + shared_caches = true; > > > > Shouldn't we use cpumask_subset() here ? > > Wouldn't cpumask_subset should return 1 if both are same? When are caches shared ? When the sibling_mask(cpu) is a strict-subset of cpu_l2_cache_mask(cpu). cpumask_weight() only checks if the number of CPUs in cpu_l2_cache_mask(cpu) is greater than sibling_mask(cpu) but not if constituent CPUs of the former forms a strict superset of the latter. We are better off using if (!cpumask_equal(sibling_mask(cpu), mask) && cpumask_subset(sibling_mask(cpu), mask)) which is accurate. > We dont want to have shared_caches set if both the masks are equal. > > > > > > + } > > > > > > set_numa_node(numa_cpu_lookup_table[cpu]); > > > set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); > > > @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus) > > > smp_ops->bringup_done(); > > > > > > dump_numa_cpu_topology(); > > > + if (shared_caches) { > > > + pr_info("Using shared cache scheduler topology\n"); > > > + powerpc_topology[bigcore_idx].mask = shared_cache_mask; > > > +#ifdef CONFIG_SCHED_DEBUG > > > + powerpc_topology[bigcore_idx].name = "CACHE"; > > > +#endif > > > + powerpc_topology[bigcore_idx].sd_flags = > > > powerpc_shared_cache_flags; > > > + } > > > > > > I would much rather that we have all the topology-fixups done in one > > function. > > > > fixup_topology(void) { > > if (has_big_core) > > powerpc_topology[smt_idx].mask = smallcore_smt_mask; > > > > if (shared_caches) { > >const char *name = "CACHE"; > >powerpc_topology[bigcore_idx].mask = shared_cache_mask; > >strlcpy(powerpc_topology[bigcore_idx].name, name, > > strlen(name)); > >powerpc_topology[bigcore_idx].sd_flags =
Re: [PATCH 06/11] powerpc/smp: Generalize 2nd sched domain
* Gautham R Shenoy [2020-07-17 12:07:55]: > On Tue, Jul 14, 2020 at 10:06:19AM +0530, Srikar Dronamraju wrote: > > Currently "CACHE" domain happens to be the 2nd sched domain as per > > powerpc_topology. This domain will collapse if cpumask of l2-cache is > > same as SMT domain. However we could generalize this domain such that it > > could mean either be a "CACHE" domain or a "BIGCORE" domain. > > > > While setting up the "CACHE" domain, check if shared_cache is already > > set. > > > > Cc: linuxppc-dev > > Cc: Michael Ellerman > > Cc: Nick Piggin > > Cc: Oliver OHalloran > > Cc: Nathan Lynch > > Cc: Michael Neuling > > Cc: Anton Blanchard > > Cc: Gautham R Shenoy > > Cc: Vaidyanathan Srinivasan > > Signed-off-by: Srikar Dronamraju > > --- > > @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int > > cpu) > > } > > #endif > > > > +static const struct cpumask *cpu_bigcore_mask(int cpu) > > +{ > > + return cpu_core_mask(cpu); > > It should be cpu_smt_mask() if we want the redundant big-core to be > degenerated in favour of the SMT level on P8, no? Because > cpu_core_mask refers to all the CPUs that are in the same chip. > Right, but it cant be cpu_smt_mask since cpu_smt_mask is only enabled in CONFIG_SCHED_SMT. I was looking at using sibling_map, but we have to careful for power9 / PowerNV mode. Guess that should be fine. > > +} > > + > > static struct sched_domain_topology_level powerpc_topology[] = { > > #ifdef CONFIG_SCHED_SMT > > { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, > > #endif > > - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, > > + { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) }, > > { cpu_cpu_mask, SD_INIT_NAME(DIE) }, > > { NULL, }, > > }; > > @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu) > > void start_secondary(void *unused) > > { > > unsigned int cpu = smp_processor_id(); > > - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > > > > mmgrab(_mm); > > current->active_mm = _mm; > > @@ -1345,14 +1351,20 @@ void start_secondary(void *unused) > > /* Update topology CPU masks */ > > add_cpu_to_masks(cpu); > > > > - if (has_big_cores) > > - sibling_mask = cpu_smallcore_mask; > > /* > > * Check for any shared caches. Note that this must be done on a > > * per-core basis because one core in the pair might be disabled. > > */ > > - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) > > - shared_caches = true; > > + if (!shared_caches) { > > + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > > + struct cpumask *mask = cpu_l2_cache_mask(cpu); > > + > > + if (has_big_cores) > > + sibling_mask = cpu_smallcore_mask; > > + > > + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) > > + shared_caches = true; > > Shouldn't we use cpumask_subset() here ? Wouldn't cpumask_subset should return 1 if both are same? We dont want to have shared_caches set if both the masks are equal. > > > + } > > > > set_numa_node(numa_cpu_lookup_table[cpu]); > > set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); > > @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus) > > smp_ops->bringup_done(); > > > > dump_numa_cpu_topology(); > > + if (shared_caches) { > > + pr_info("Using shared cache scheduler topology\n"); > > + powerpc_topology[bigcore_idx].mask = shared_cache_mask; > > +#ifdef CONFIG_SCHED_DEBUG > > + powerpc_topology[bigcore_idx].name = "CACHE"; > > +#endif > > + powerpc_topology[bigcore_idx].sd_flags = > > powerpc_shared_cache_flags; > > + } > > > I would much rather that we have all the topology-fixups done in one > function. > > fixup_topology(void) { > if (has_big_core) > powerpc_topology[smt_idx].mask = smallcore_smt_mask; > > if (shared_caches) { >const char *name = "CACHE"; >powerpc_topology[bigcore_idx].mask = shared_cache_mask; >strlcpy(powerpc_topology[bigcore_idx].name, name, > strlen(name)); >powerpc_topology[bigcore_idx].sd_flags = powerpc_shared_cache_flags; > } > > /* Any other changes to the topology structure here */ We could do this. > > And also as an optimization, get rid of degenerate structures here > itself so that we don't pay additional penalty while building the > sched-domains each time. > Yes this is definitely in plan, but slightly later in time. Thanks for the review and comments. -- Thanks and Regards Srikar Dronamraju
Re: [PATCH 06/11] powerpc/smp: Generalize 2nd sched domain
On Tue, Jul 14, 2020 at 10:06:19AM +0530, Srikar Dronamraju wrote: > Currently "CACHE" domain happens to be the 2nd sched domain as per > powerpc_topology. This domain will collapse if cpumask of l2-cache is > same as SMT domain. However we could generalize this domain such that it > could mean either be a "CACHE" domain or a "BIGCORE" domain. > > While setting up the "CACHE" domain, check if shared_cache is already > set. > > Cc: linuxppc-dev > Cc: Michael Ellerman > Cc: Nick Piggin > Cc: Oliver OHalloran > Cc: Nathan Lynch > Cc: Michael Neuling > Cc: Anton Blanchard > Cc: Gautham R Shenoy > Cc: Vaidyanathan Srinivasan > Signed-off-by: Srikar Dronamraju > --- > arch/powerpc/kernel/smp.c | 48 +++ > 1 file changed, 34 insertions(+), 14 deletions(-) > > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index 875f57e41355..f8faf75135af 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -85,6 +85,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); > EXPORT_PER_CPU_SYMBOL(cpu_core_map); > EXPORT_SYMBOL_GPL(has_big_cores); > > +enum { > +#ifdef CONFIG_SCHED_SMT > + smt_idx, > +#endif > + bigcore_idx, > + die_idx, > +}; > + > #define MAX_THREAD_LIST_SIZE 8 > #define THREAD_GROUP_SHARE_L1 1 > struct thread_groups { > @@ -851,13 +859,7 @@ static int powerpc_shared_cache_flags(void) > */ > static const struct cpumask *shared_cache_mask(int cpu) > { > - if (shared_caches) > - return cpu_l2_cache_mask(cpu); > - > - if (has_big_cores) > - return cpu_smallcore_mask(cpu); > - > - return cpu_smt_mask(cpu); > + return per_cpu(cpu_l2_cache_map, cpu); > } > > #ifdef CONFIG_SCHED_SMT > @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int cpu) > } > #endif > > +static const struct cpumask *cpu_bigcore_mask(int cpu) > +{ > + return cpu_core_mask(cpu); It should be cpu_smt_mask() if we want the redundant big-core to be degenerated in favour of the SMT level on P8, no? Because cpu_core_mask refers to all the CPUs that are in the same chip. > +} > + > static struct sched_domain_topology_level powerpc_topology[] = { > #ifdef CONFIG_SCHED_SMT > { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, > #endif > - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, > + { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) }, > { cpu_cpu_mask, SD_INIT_NAME(DIE) }, > { NULL, }, > }; > @@ -895,7 +902,7 @@ static int init_big_cores(void) > > #ifdef CONFIG_SCHED_SMT > pr_info("Big cores detected. Using small core scheduling\n"); > - powerpc_topology[0].mask = smallcore_smt_mask; > + powerpc_topology[smt_idx].mask = smallcore_smt_mask; > #endif > > return 0; > @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu) > void start_secondary(void *unused) > { > unsigned int cpu = smp_processor_id(); > - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > > mmgrab(_mm); > current->active_mm = _mm; > @@ -1345,14 +1351,20 @@ void start_secondary(void *unused) > /* Update topology CPU masks */ > add_cpu_to_masks(cpu); > > - if (has_big_cores) > - sibling_mask = cpu_smallcore_mask; > /* >* Check for any shared caches. Note that this must be done on a >* per-core basis because one core in the pair might be disabled. >*/ > - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) > - shared_caches = true; > + if (!shared_caches) { > + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > + struct cpumask *mask = cpu_l2_cache_mask(cpu); > + > + if (has_big_cores) > + sibling_mask = cpu_smallcore_mask; > + > + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) > + shared_caches = true; Shouldn't we use cpumask_subset() here ? > + } > > set_numa_node(numa_cpu_lookup_table[cpu]); > set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); > @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus) > smp_ops->bringup_done(); > > dump_numa_cpu_topology(); > + if (shared_caches) { > + pr_info("Using shared cache scheduler topology\n"); > + powerpc_topology[bigcore_idx].mask = shared_cache_mask; > +#ifdef CONFIG_SCHED_DEBUG > + powerpc_topology[bigcore_idx].name = "CACHE"; > +#endif > + powerpc_topology[bigcore_idx].sd_flags = > powerpc_shared_cache_flags; > + } I would much rather that we have all the topology-fixups done in one function. fixup_topology(void) { if (has_big_core) powerpc_topology[smt_idx].mask = smallcore_smt_mask; if (shared_caches) { const char *name = "CACHE"; powerpc_topology[bigcore_idx].mask =
[PATCH 06/11] powerpc/smp: Generalize 2nd sched domain
Currently "CACHE" domain happens to be the 2nd sched domain as per powerpc_topology. This domain will collapse if cpumask of l2-cache is same as SMT domain. However we could generalize this domain such that it could mean either be a "CACHE" domain or a "BIGCORE" domain. While setting up the "CACHE" domain, check if shared_cache is already set. Cc: linuxppc-dev Cc: Michael Ellerman Cc: Nick Piggin Cc: Oliver OHalloran Cc: Nathan Lynch Cc: Michael Neuling Cc: Anton Blanchard Cc: Gautham R Shenoy Cc: Vaidyanathan Srinivasan Signed-off-by: Srikar Dronamraju --- arch/powerpc/kernel/smp.c | 48 +++ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 875f57e41355..f8faf75135af 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -85,6 +85,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); EXPORT_SYMBOL_GPL(has_big_cores); +enum { +#ifdef CONFIG_SCHED_SMT + smt_idx, +#endif + bigcore_idx, + die_idx, +}; + #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 struct thread_groups { @@ -851,13 +859,7 @@ static int powerpc_shared_cache_flags(void) */ static const struct cpumask *shared_cache_mask(int cpu) { - if (shared_caches) - return cpu_l2_cache_mask(cpu); - - if (has_big_cores) - return cpu_smallcore_mask(cpu); - - return cpu_smt_mask(cpu); + return per_cpu(cpu_l2_cache_map, cpu); } #ifdef CONFIG_SCHED_SMT @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int cpu) } #endif +static const struct cpumask *cpu_bigcore_mask(int cpu) +{ + return cpu_core_mask(cpu); +} + static struct sched_domain_topology_level powerpc_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, #endif - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, + { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) }, { cpu_cpu_mask, SD_INIT_NAME(DIE) }, { NULL, }, }; @@ -895,7 +902,7 @@ static int init_big_cores(void) #ifdef CONFIG_SCHED_SMT pr_info("Big cores detected. Using small core scheduling\n"); - powerpc_topology[0].mask = smallcore_smt_mask; + powerpc_topology[smt_idx].mask = smallcore_smt_mask; #endif return 0; @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu) void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; mmgrab(_mm); current->active_mm = _mm; @@ -1345,14 +1351,20 @@ void start_secondary(void *unused) /* Update topology CPU masks */ add_cpu_to_masks(cpu); - if (has_big_cores) - sibling_mask = cpu_smallcore_mask; /* * Check for any shared caches. Note that this must be done on a * per-core basis because one core in the pair might be disabled. */ - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) - shared_caches = true; + if (!shared_caches) { + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; + struct cpumask *mask = cpu_l2_cache_mask(cpu); + + if (has_big_cores) + sibling_mask = cpu_smallcore_mask; + + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) + shared_caches = true; + } set_numa_node(numa_cpu_lookup_table[cpu]); set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus) smp_ops->bringup_done(); dump_numa_cpu_topology(); + if (shared_caches) { + pr_info("Using shared cache scheduler topology\n"); + powerpc_topology[bigcore_idx].mask = shared_cache_mask; +#ifdef CONFIG_SCHED_DEBUG + powerpc_topology[bigcore_idx].name = "CACHE"; +#endif + powerpc_topology[bigcore_idx].sd_flags = powerpc_shared_cache_flags; + } set_sched_topology(powerpc_topology); } -- 2.17.1