* Parth Shah <pa...@linux.ibm.com> [2021-07-28 23:26:05]:

> From: "Gautham R. Shenoy" <e...@linux.vnet.ibm.com>
> 
> Currently the cacheinfo code on powerpc indexes the "cache" objects
> (modelling the L1/L2/L3 caches) where the key is device-tree node
> corresponding to that cache. On some of the POWER server platforms
> thread-groups within the core share different sets of caches (Eg: On
> SMT8 POWER9 systems, threads 0,2,4,6 of a core share L1 cache and
> threads 1,3,5,7 of the same core share another L1 cache). On such
> platforms, there is a single device-tree node corresponding to that
> cache and the cache-configuration within the threads of the core is
> indicated via "ibm,thread-groups" device-tree property.
> 
> Since the current code is not aware of the "ibm,thread-groups"
> property, on the aforementoined systems, cacheinfo code still treats
> all the threads in the core to be sharing the cache because of the
> single device-tree node (In the earlier example, the cacheinfo code
> would says CPUs 0-7 share L1 cache).
> 
> In this patch, we make the powerpc cacheinfo code aware of the
> "ibm,thread-groups" property. We indexe the "cache" objects by the
> key-pair (device-tree node, thread-group id). For any CPUX, for a
> given level of cache, the thread-group id is defined to be the first
> CPU in the "ibm,thread-groups" cache-group containing CPUX. For levels
> of cache which are not represented in "ibm,thread-groups" property,
> the thread-group id is -1.
> 
> Signed-off-by: Gautham R. Shenoy <e...@linux.vnet.ibm.com>
> [parth: Remove "static" keyword for the definition of 
> "thread_group_l1_cache_map"
> and "thread_group_l2_cache_map" to get rid of the compile error.]
> Signed-off-by: Parth Shah <pa...@linux.ibm.com>


Looks good to me.

Reviewed-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>

> ---
>  arch/powerpc/include/asm/smp.h  |  3 ++
>  arch/powerpc/kernel/cacheinfo.c | 80 ++++++++++++++++++++++++---------
>  arch/powerpc/kernel/smp.c       |  4 +-
>  3 files changed, 63 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
> index 03b3d010cbab..1259040cc3a4 100644
> --- a/arch/powerpc/include/asm/smp.h
> +++ b/arch/powerpc/include/asm/smp.h
> @@ -33,6 +33,9 @@ extern bool coregroup_enabled;
>  extern int cpu_to_chip_id(int cpu);
>  extern int *chip_id_lookup_table;
> 
> +DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
> +DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> +
>  #ifdef CONFIG_SMP
> 
>  struct smp_ops_t {
> diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
> index 6f903e9aa20b..5a6925d87424 100644
> --- a/arch/powerpc/kernel/cacheinfo.c
> +++ b/arch/powerpc/kernel/cacheinfo.c
> @@ -120,6 +120,7 @@ struct cache {
>       struct cpumask shared_cpu_map; /* online CPUs using this cache */
>       int type;                      /* split cache disambiguation */
>       int level;                     /* level not explicit in device tree */
> +     int group_id;                  /* id of the group of threads that share 
> this cache */
>       struct list_head list;         /* global list of cache objects */
>       struct cache *next_local;      /* next cache of >= level */
>  };
> @@ -142,22 +143,24 @@ static const char *cache_type_string(const struct cache 
> *cache)
>  }
> 
>  static void cache_init(struct cache *cache, int type, int level,
> -                    struct device_node *ofnode)
> +                    struct device_node *ofnode, int group_id)
>  {
>       cache->type = type;
>       cache->level = level;
>       cache->ofnode = of_node_get(ofnode);
> +     cache->group_id = group_id;
>       INIT_LIST_HEAD(&cache->list);
>       list_add(&cache->list, &cache_list);
>  }
> 
> -static struct cache *new_cache(int type, int level, struct device_node 
> *ofnode)
> +static struct cache *new_cache(int type, int level,
> +                            struct device_node *ofnode, int group_id)
>  {
>       struct cache *cache;
> 
>       cache = kzalloc(sizeof(*cache), GFP_KERNEL);
>       if (cache)
> -             cache_init(cache, type, level, ofnode);
> +             cache_init(cache, type, level, ofnode, group_id);
> 
>       return cache;
>  }
> @@ -309,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct 
> cache *cache)
>               return cache;
> 
>       list_for_each_entry(iter, &cache_list, list)
> -             if (iter->ofnode == cache->ofnode && iter->next_local == cache)
> +             if (iter->ofnode == cache->ofnode &&
> +                 iter->group_id == cache->group_id &&
> +                 iter->next_local == cache)
>                       return iter;
> 
>       return cache;
>  }
> 
> -/* return the first cache on a local list matching node */
> -static struct cache *cache_lookup_by_node(const struct device_node *node)
> +/* return the first cache on a local list matching node and thread-group id 
> */
> +static struct cache *cache_lookup_by_node_group(const struct device_node 
> *node,
> +                                             int group_id)
>  {
>       struct cache *cache = NULL;
>       struct cache *iter;
> 
>       list_for_each_entry(iter, &cache_list, list) {
> -             if (iter->ofnode != node)
> +             if (iter->ofnode != node ||
> +                 iter->group_id != group_id)
>                       continue;
>               cache = cache_find_first_sibling(iter);
>               break;
> @@ -352,14 +359,15 @@ static int cache_is_unified_d(const struct device_node 
> *np)
>               CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
>  }
> 
> -static struct cache *cache_do_one_devnode_unified(struct device_node *node, 
> int level)
> +static struct cache *cache_do_one_devnode_unified(struct device_node *node, 
> int group_id,
> +                                               int level)
>  {
>       pr_debug("creating L%d ucache for %pOFP\n", level, node);
> 
> -     return new_cache(cache_is_unified_d(node), level, node);
> +     return new_cache(cache_is_unified_d(node), level, node, group_id);
>  }
> 
> -static struct cache *cache_do_one_devnode_split(struct device_node *node,
> +static struct cache *cache_do_one_devnode_split(struct device_node *node, 
> int group_id,
>                                               int level)
>  {
>       struct cache *dcache, *icache;
> @@ -367,8 +375,8 @@ static struct cache *cache_do_one_devnode_split(struct 
> device_node *node,
>       pr_debug("creating L%d dcache and icache for %pOFP\n", level,
>                node);
> 
> -     dcache = new_cache(CACHE_TYPE_DATA, level, node);
> -     icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
> +     dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
> +     icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
> 
>       if (!dcache || !icache)
>               goto err;
> @@ -382,31 +390,32 @@ static struct cache *cache_do_one_devnode_split(struct 
> device_node *node,
>       return NULL;
>  }
> 
> -static struct cache *cache_do_one_devnode(struct device_node *node, int 
> level)
> +static struct cache *cache_do_one_devnode(struct device_node *node, int 
> group_id, int level)
>  {
>       struct cache *cache;
> 
>       if (cache_node_is_unified(node))
> -             cache = cache_do_one_devnode_unified(node, level);
> +             cache = cache_do_one_devnode_unified(node, group_id, level);
>       else
> -             cache = cache_do_one_devnode_split(node, level);
> +             cache = cache_do_one_devnode_split(node, group_id, level);
> 
>       return cache;
>  }
> 
>  static struct cache *cache_lookup_or_instantiate(struct device_node *node,
> +                                              int group_id,
>                                                int level)
>  {
>       struct cache *cache;
> 
> -     cache = cache_lookup_by_node(node);
> +     cache = cache_lookup_by_node_group(node, group_id);
> 
>       WARN_ONCE(cache && cache->level != level,
>                 "cache level mismatch on lookup (got %d, expected %d)\n",
>                 cache->level, level);
> 
>       if (!cache)
> -             cache = cache_do_one_devnode(node, level);
> +             cache = cache_do_one_devnode(node, group_id, level);
> 
>       return cache;
>  }
> @@ -443,7 +452,27 @@ static void do_subsidiary_caches_debugcheck(struct cache 
> *cache)
>                 of_node_get_device_type(cache->ofnode));
>  }
> 
> -static void do_subsidiary_caches(struct cache *cache)
> +/*
> + * If sub-groups of threads in a core containing @cpu_id share the
> + * L@level-cache (information obtained via "ibm,thread-groups"
> + * device-tree property), then we identify the group by the first
> + * thread-sibling in the group. We define this to be the group-id.
> + *
> + * In the absence of any thread-group information for L@level-cache,
> + * this function returns -1.
> + */
> +static int get_group_id(unsigned int cpu_id, int level)
> +{
> +     if (has_big_cores && level == 1)
> +             return cpumask_first(per_cpu(thread_group_l1_cache_map,
> +                                          cpu_id));
> +     else if (thread_group_shares_l2 && level == 2)
> +             return cpumask_first(per_cpu(thread_group_l2_cache_map,
> +                                          cpu_id));
> +     return -1;
> +}
> +
> +static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
>  {
>       struct device_node *subcache_node;
>       int level = cache->level;
> @@ -452,9 +481,11 @@ static void do_subsidiary_caches(struct cache *cache)
> 
>       while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
>               struct cache *subcache;
> +             int group_id;
> 
>               level++;
> -             subcache = cache_lookup_or_instantiate(subcache_node, level);
> +             group_id = get_group_id(cpu_id, level);
> +             subcache = cache_lookup_or_instantiate(subcache_node, group_id, 
> level);
>               of_node_put(subcache_node);
>               if (!subcache)
>                       break;
> @@ -468,6 +499,7 @@ static struct cache *cache_chain_instantiate(unsigned int 
> cpu_id)
>  {
>       struct device_node *cpu_node;
>       struct cache *cpu_cache = NULL;
> +     int group_id;
> 
>       pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
> 
> @@ -476,11 +508,13 @@ static struct cache *cache_chain_instantiate(unsigned 
> int cpu_id)
>       if (!cpu_node)
>               goto out;
> 
> -     cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
> +     group_id = get_group_id(cpu_id, 1);
> +
> +     cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
>       if (!cpu_cache)
>               goto out;
> 
> -     do_subsidiary_caches(cpu_cache);
> +     do_subsidiary_caches(cpu_cache, cpu_id);
> 
>       cache_cpu_set(cpu_cache, cpu_id);
>  out:
> @@ -848,13 +882,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int 
> cpu_id)
>  {
>       struct device_node *cpu_node;
>       struct cache *cache;
> +     int group_id;
> 
>       cpu_node = of_get_cpu_node(cpu_id, NULL);
>       WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
>       if (!cpu_node)
>               return NULL;
> 
> -     cache = cache_lookup_by_node(cpu_node);
> +     group_id = get_group_id(cpu_id, 1);
> +     cache = cache_lookup_by_node_group(cpu_node, group_id);
>       of_node_put(cpu_node);
> 
>       return cache;
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 447b78a87c8f..a7fcac44a8e2 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] 
> __initdata;
>   * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
>   * the set its siblings that share the L1-cache.
>   */
> -static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
> +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
> 
>  /*
>   * On some big-cores system, thread_group_l2_cache_map for each CPU
>   * corresponds to the set its siblings within the core that share the
>   * L2-cache.
>   */
> -static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> 
>  /* SMP operations for this machine */
>  struct smp_ops_t *smp_ops;
> -- 
> 2.26.3
> 

-- 
Thanks and Regards
Srikar Dronamraju

Reply via email to