smp: Add support detecting thread-groups sharing L2 cache

Srikar Dronamraju Mon, 07 Dec 2020 04:43:00 -0800

* Gautham R. Shenoy <e...@linux.vnet.ibm.com> [2020-12-04 10:18:46]:

> From: "Gautham R. Shenoy" <e...@linux.vnet.ibm.com>
> 
> On POWER systems, groups of threads within a core sharing the L2-cache
> can be indicated by the "ibm,thread-groups" property array with the
> identifier "2".
> 
> This patch adds support for detecting this, and when present, populate
> the populating the cpu_l2_cache_mask of every CPU to the core-siblings
> which share L2 with the CPU as specified in the by the
> "ibm,thread-groups" property array.
> 
> On a platform with the following "ibm,thread-group" configuration
>                00000001 00000002 00000004 00000000
>                00000002 00000004 00000006 00000001
>                00000003 00000005 00000007 00000002
>                00000002 00000004 00000000 00000002
>                00000004 00000006 00000001 00000003
>                00000005 00000007
> 
> Without this patch, the sched-domain hierarchy for CPUs 0,1 would be
>       CPU0 attaching sched-domain(s):
>       domain-0: span=0,2,4,6 level=SMT
>       domain-1: span=0-7 level=CACHE
>       domain-2: span=0-15,24-39,48-55 level=MC
>       domain-3: span=0-55 level=DIE
> 
>       CPU1 attaching sched-domain(s):
>       domain-0: span=1,3,5,7 level=SMT
>       domain-1: span=0-7 level=CACHE
>       domain-2: span=0-15,24-39,48-55 level=MC
>       domain-3: span=0-55 level=DIE
> 
> The CACHE domain at 0-7 is incorrect since the ibm,thread-groups
> sub-array
> [00000002 00000002 00000004
>  00000000 00000002 00000004 00000006
>  00000001 00000003 00000005 00000007]
> indicates that L2 (Property "2") is shared only between the threads of a 
> single
> group. There are "2" groups of threads where each group contains "4"
> threads each. The groups being {0,2,4,6} and {1,3,5,7}.
> 
> With this patch, the sched-domain hierarchy for CPUs 0,1 would be
>       CPU0 attaching sched-domain(s):
>       domain-0: span=0,2,4,6 level=SMT
>       domain-1: span=0-15,24-39,48-55 level=MC
>       domain-2: span=0-55 level=DIE
> 
>       CPU1 attaching sched-domain(s):
>       domain-0: span=1,3,5,7 level=SMT
>       domain-1: span=0-15,24-39,48-55 level=MC
>       domain-2: span=0-55 level=DIE
> 
> The CACHE domain with span=0,2,4,6 for CPU 0 (span=1,3,5,7 for CPU 1
> resp.) gets degenerated into the SMT domain. Furthermore, the
> last-level-cache domain gets correctly set to the SMT sched-domain.
> 
> Signed-off-by: Gautham R. Shenoy <e...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/smp.c | 66 
> +++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 58 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 6a242a3..a116d2d 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -76,6 +76,7 @@
>  struct task_struct *secondary_current;
>  bool has_big_cores;
>  bool coregroup_enabled;
> +bool thread_group_shares_l2;


Either keep this as static in this patch or add its declaration

> 
>  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
>  DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
> @@ -99,6 +100,7 @@ enum {
> 
>  #define MAX_THREAD_LIST_SIZE 8
>  #define THREAD_GROUP_SHARE_L1   1
> +#define THREAD_GROUP_SHARE_L2   2
>  struct thread_groups {
>       unsigned int property;
>       unsigned int nr_groups;
> @@ -107,7 +109,7 @@ struct thread_groups {
>  };
> 
>  /* Maximum number of properties that groups of threads within a core can 
> share */
> -#define MAX_THREAD_GROUP_PROPERTIES 1
> +#define MAX_THREAD_GROUP_PROPERTIES 2
> 
>  struct thread_groups_list {
>       unsigned int nr_properties;
> @@ -121,6 +123,13 @@ struct thread_groups_list {
>   */
>  DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
> 
> +/*
> + * On some big-cores system, thread_group_l2_cache_map for each CPU
> + * corresponds to the set its siblings within the core that share the
> + * L2-cache.
> + */
> +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> +

NIT:
We are trying to confuse ourselves with the names.
For L1 we have cpu_l2_cache_map to store the tasks from the thread group.
but cpu_smallcore_map for keeping track of tasks.

For L2 we have thread_group_l2_cache_map to store the tasks from the thread
group.  but cpu_l2_cache_map for keeping track of tasks.

I think we should do some renaming to keep the names consistent.
I would say probably say move the current cpu_l2_cache_map to
cpu_llc_cache_map and move the new aka  thread_group_l2_cache_map as
cpu_l2_cache_map to be somewhat consistent.

>  /* SMP operations for this machine */
>  struct smp_ops_t *smp_ops;
> 
> @@ -840,7 +851,8 @@ static int init_cpu_cache_map(int cpu, unsigned int 
> cache_property)
>       if (!dn)
>               return -ENODATA;
> 
> -     if (!(cache_property == THREAD_GROUP_SHARE_L1))
> +     if (!(cache_property == THREAD_GROUP_SHARE_L1 ||
> +           cache_property == THREAD_GROUP_SHARE_L2))
>               return -EINVAL;
> 
>       if (!cpu_tgl->nr_properties) {
> @@ -867,7 +879,10 @@ static int init_cpu_cache_map(int cpu, unsigned int 
> cache_property)
>               goto out;
>       }
> 
> -     mask = &per_cpu(cpu_l1_cache_map, cpu);
> +     if (cache_property == THREAD_GROUP_SHARE_L1)
> +             mask = &per_cpu(cpu_l1_cache_map, cpu);
> +     else if (cache_property == THREAD_GROUP_SHARE_L2)
> +             mask = &per_cpu(thread_group_l2_cache_map, cpu);
> 
>       zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> 
> @@ -973,6 +988,16 @@ static int init_big_cores(void)
>       }
> 
>       has_big_cores = true;
> +
> +     for_each_possible_cpu(cpu) {
> +             int err = init_cpu_cache_map(cpu, THREAD_GROUP_SHARE_L2);
> +
> +             if (err)
> +                     return err;
> +     }
> +
> +     thread_group_shares_l2 = true;

Why do we need a separate loop. Why cant we merge this in the above loop
itself? 

> +     pr_info("Thread-groups in a core share L2-cache\n");

Can this be moved to a pr_debug? Does it help any regular user/admins to
know if thread-groups shared l2 cache. Infact it may confuse users on what
thread groups are and which thread groups dont share cache.
I would prefer some other name than thread_group_shares_l2 but dont know any
better alternatives and may be my choices are even worse.

>       return 0;
>  }
> 
> @@ -1287,6 +1312,31 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t 
> *mask)
>       if (has_big_cores)
>               submask_fn = cpu_smallcore_mask;
> 
> +

NIT: extra blank line?

> +     /*
> +      * If the threads in a thread-group share L2 cache, then then
> +      * the L2-mask can be obtained from thread_group_l2_cache_map.
> +      */
> +     if (thread_group_shares_l2) {
> +             /* Siblings that share L1 is a subset of siblings that share 
> L2.*/
> +             or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
> +             if (*mask) {
> +                     cpumask_andnot(*mask,
> +                                    per_cpu(thread_group_l2_cache_map, cpu),
> +                                    cpu_l2_cache_mask(cpu));
> +             } else {
> +                     mask = &per_cpu(thread_group_l2_cache_map, cpu);
> +             }
> +
> +             for_each_cpu(i, *mask) {
> +                     if (!cpu_online(i))
> +                             continue;
> +                     set_cpus_related(i, cpu, cpu_l2_cache_mask);
> +             }
> +
> +             return true;
> +     }
> +

Ah this can be simplified to:
if (thread_group_shares_l2) {
        cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));

        for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
                if (cpu_online(i))
                        set_cpus_related(i, cpu, cpu_l2_cache_mask);
        }
}

No?

>       l2_cache = cpu_to_l2cache(cpu);
>       if (!l2_cache || !*mask) {
>               /* Assume only core siblings share cache with this CPU */

-- 
Thanks and Regards
Srikar Dronamraju

Re: [PATCH 2/3] powerpc/smp: Add support detecting thread-groups sharing L2 cache

Reply via email to