On Wed, Jul 28, 2021 at 11:26:07PM +0530, Parth Shah wrote: > On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus > in thread-group share both L2 and L3 caches. Hence, use cache_property = 2 > itself to find both the L2 and L3 cache siblings. > Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings, > but fill the mask using same property "2" array.
This version looks good to me. Reviewed-by: Gautham R. Shenoy <e...@linux.vnet.ibm.com> > > Signed-off-by: Parth Shah <pa...@linux.ibm.com> > --- > arch/powerpc/include/asm/smp.h | 3 ++ > arch/powerpc/kernel/cacheinfo.c | 3 ++ > arch/powerpc/kernel/smp.c | 66 ++++++++++++++++++++++----------- > 3 files changed, 51 insertions(+), 21 deletions(-) > > diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h > index 1259040cc3a4..7ef1cd8168a0 100644 > --- a/arch/powerpc/include/asm/smp.h > +++ b/arch/powerpc/include/asm/smp.h > @@ -35,6 +35,7 @@ extern int *chip_id_lookup_table; > > DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); > DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); > +DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); > > #ifdef CONFIG_SMP > > @@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu); > > extern bool has_big_cores; > extern bool thread_group_shares_l2; > +extern bool thread_group_shares_l3; > > #define cpu_smt_mask cpu_smt_mask > #ifdef CONFIG_SCHED_SMT > @@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu); > #define hard_smp_processor_id() get_hard_smp_processor_id(0) > #define smp_setup_cpu_maps() > #define thread_group_shares_l2 0 > +#define thread_group_shares_l3 0 > static inline void inhibit_secondary_onlining(void) {} > static inline void uninhibit_secondary_onlining(void) {} > static inline const struct cpumask *cpu_sibling_mask(int cpu) > diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c > index 20d91693eac1..cf1be75b7833 100644 > --- a/arch/powerpc/kernel/cacheinfo.c > +++ b/arch/powerpc/kernel/cacheinfo.c > @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level) > else if (thread_group_shares_l2 && level == 2) > return cpumask_first(per_cpu(thread_group_l2_cache_map, > cpu_id)); > + else if (thread_group_shares_l3 && level == 3) > + return cpumask_first(per_cpu(thread_group_l3_cache_map, > + cpu_id)); > return -1; > } > > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index a7fcac44a8e2..f2abd88e0c25 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -78,6 +78,7 @@ struct task_struct *secondary_current; > bool has_big_cores; > bool coregroup_enabled; > bool thread_group_shares_l2; > +bool thread_group_shares_l3; > > DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); > DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); > @@ -101,7 +102,7 @@ enum { > > #define MAX_THREAD_LIST_SIZE 8 > #define THREAD_GROUP_SHARE_L1 1 > -#define THREAD_GROUP_SHARE_L2 2 > +#define THREAD_GROUP_SHARE_L2_L3 2 > struct thread_groups { > unsigned int property; > unsigned int nr_groups; > @@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); > */ > DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); > > +/* > + * On P10, thread_group_l3_cache_map for each CPU is equal to the > + * thread_group_l2_cache_map > + */ > +DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); > + > /* SMP operations for this machine */ > struct smp_ops_t *smp_ops; > > @@ -889,19 +896,41 @@ static struct thread_groups *__init > get_thread_groups(int cpu, > return tg; > } > > +static int update_mask_from_threadgroup(cpumask_var_t *mask, struct > thread_groups *tg, int cpu, int cpu_group_start) > +{ > + int first_thread = cpu_first_thread_sibling(cpu); > + int i; > + > + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); > + > + for (i = first_thread; i < first_thread + threads_per_core; i++) { > + int i_group_start = get_cpu_thread_group_start(i, tg); > + > + if (unlikely(i_group_start == -1)) { > + WARN_ON_ONCE(1); > + return -ENODATA; > + } > + > + if (i_group_start == cpu_group_start) > + cpumask_set_cpu(i, *mask); > + } > + > + return 0; > +} > + > static int __init init_thread_group_cache_map(int cpu, int cache_property) > > { > - int first_thread = cpu_first_thread_sibling(cpu); > - int i, cpu_group_start = -1, err = 0; > + int cpu_group_start = -1, err = 0; > struct thread_groups *tg = NULL; > cpumask_var_t *mask = NULL; > > if (cache_property != THREAD_GROUP_SHARE_L1 && > - cache_property != THREAD_GROUP_SHARE_L2) > + cache_property != THREAD_GROUP_SHARE_L2_L3) > return -EINVAL; > > tg = get_thread_groups(cpu, cache_property, &err); > + > if (!tg) > return err; > > @@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, > int cache_property) > return -ENODATA; > } > > - if (cache_property == THREAD_GROUP_SHARE_L1) > + if (cache_property == THREAD_GROUP_SHARE_L1) { > mask = &per_cpu(thread_group_l1_cache_map, cpu); > - else if (cache_property == THREAD_GROUP_SHARE_L2) > + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); > + } > + else if (cache_property == THREAD_GROUP_SHARE_L2_L3) { > mask = &per_cpu(thread_group_l2_cache_map, cpu); > - > - zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); > - > - for (i = first_thread; i < first_thread + threads_per_core; i++) { > - int i_group_start = get_cpu_thread_group_start(i, tg); > - > - if (unlikely(i_group_start == -1)) { > - WARN_ON_ONCE(1); > - return -ENODATA; > - } > - > - if (i_group_start == cpu_group_start) > - cpumask_set_cpu(i, *mask); > + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); > + mask = &per_cpu(thread_group_l3_cache_map, cpu); > + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); > } > > + > return 0; > } > > @@ -1020,14 +1042,16 @@ static int __init init_big_cores(void) > has_big_cores = true; > > for_each_possible_cpu(cpu) { > - int err = init_thread_group_cache_map(cpu, > THREAD_GROUP_SHARE_L2); > + int err = init_thread_group_cache_map(cpu, > THREAD_GROUP_SHARE_L2_L3); > > if (err) > return err; > } > > thread_group_shares_l2 = true; > - pr_debug("L2 cache only shared by the threads in the small core\n"); > + thread_group_shares_l3 = true; > + pr_debug("L2/L3 cache only shared by the threads in the small core\n"); > + > return 0; > } > > -- > 2.26.3 >