On Mon, Jun 28, 2021 at 08:41:15PM +0530, Aneesh Kumar K.V wrote:
> The associativity details of the newly added resourced are collected from
> the hypervisor via "ibm,configure-connector" rtas call. Update the numa
> distance details of the newly added numa node after the above call.
> 
> Instead of updating NUMA distance every time we lookup a node id
> from the associativity property, add helpers that can be used
> during boot which does this only once. Also remove the distance
> update from node id lookup helpers.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.ibm.com>
> ---
>  arch/powerpc/mm/numa.c                        | 173 +++++++++++++-----
>  arch/powerpc/platforms/pseries/hotplug-cpu.c  |   2 +
>  .../platforms/pseries/hotplug-memory.c        |   2 +
>  arch/powerpc/platforms/pseries/pseries.h      |   1 +
>  4 files changed, 132 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 0ec16999beef..7b142f79d600 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -208,22 +208,6 @@ int __node_distance(int a, int b)
>  }
>  EXPORT_SYMBOL(__node_distance);
>  
> -static void initialize_distance_lookup_table(int nid,
> -             const __be32 *associativity)
> -{
> -     int i;
> -
> -     if (affinity_form != FORM1_AFFINITY)
> -             return;
> -
> -     for (i = 0; i < max_associativity_domain_index; i++) {
> -             const __be32 *entry;
> -
> -             entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> -             distance_lookup_table[nid][i] = of_read_number(entry, 1);
> -     }
> -}
> -
>  /*
>   * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
>   * info is found.
> @@ -241,15 +225,6 @@ static int associativity_to_nid(const __be32 
> *associativity)
>       /* POWER4 LPAR uses 0xffff as invalid node */
>       if (nid == 0xffff || nid >= nr_node_ids)
>               nid = NUMA_NO_NODE;
> -
> -     if (nid > 0 &&
> -             of_read_number(associativity, 1) >= 
> max_associativity_domain_index) {
> -             /*
> -              * Skip the length field and send start of associativity array
> -              */
> -             initialize_distance_lookup_table(nid, associativity + 1);
> -     }
> -
>  out:
>       return nid;
>  }
> @@ -287,6 +262,49 @@ int of_node_to_nid(struct device_node *device)
>  }
>  EXPORT_SYMBOL(of_node_to_nid);
>  
> +static void __initialize_form1_numa_distance(const __be32 *associativity)
> +{
> +     int i, nid;
> +
> +     if (affinity_form != FORM1_AFFINITY)

Since this shouldn't be called on a !form1 system, this could be a WARN_ON().

> +             return;
> +
> +     if (of_read_number(associativity, 1) >= primary_domain_index) {
> +             nid = of_read_number(&associativity[primary_domain_index], 1);

This computes the nid from the assoc array independently of
associativity_to_nid, which doesn't seem like a good idea.  Wouldn't
it be better to call assocaitivity_to_nid(), then make the next bit
conditional on nid !== NUMA_NO_NODE?

> +
> +             for (i = 0; i < max_associativity_domain_index; i++) {
> +                     const __be32 *entry;
> +
> +                     entry = 
> &associativity[be32_to_cpu(distance_ref_points[i])];
> +                     distance_lookup_table[nid][i] = of_read_number(entry, 
> 1);
> +             }
> +     }
> +}
> +
> +static void initialize_form1_numa_distance(struct device_node *node)
> +{
> +     const __be32 *associativity;
> +
> +     associativity = of_get_associativity(node);
> +     if (!associativity)
> +             return;
> +
> +     __initialize_form1_numa_distance(associativity);
> +}
> +
> +/*
> + * Used to update distance information w.r.t newly added node.
> + */
> +void update_numa_distance(struct device_node *node)
> +{
> +     if (affinity_form == FORM0_AFFINITY)
> +             return;
> +     else if (affinity_form == FORM1_AFFINITY) {
> +             initialize_form1_numa_distance(node);
> +             return;
> +     }
> +}
> +
>  static int __init find_primary_domain_index(void)
>  {
>       int index;
> @@ -433,6 +451,48 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
>       return 0;
>  }
>  
> +static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
> +{
> +     struct assoc_arrays aa = { .arrays = NULL };
> +     int default_nid = NUMA_NO_NODE;
> +     int nid = default_nid;
> +     int rc, index;
> +
> +     if ((primary_domain_index < 0) || !numa_enabled)

Under what circumstances could you get primary_domain_index < 0?

> +             return default_nid;
> +
> +     rc = of_get_assoc_arrays(&aa);
> +     if (rc)
> +             return default_nid;
> +
> +     if (primary_domain_index <= aa.array_sz &&
> +         !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < 
> aa.n_arrays) {
> +             index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;

Does anywhere verify that primary_domain_index <= aa.array_sz?

> +             nid = of_read_number(&aa.arrays[index], 1);
> +
> +             if (nid == 0xffff || nid >= nr_node_ids)
> +                     nid = default_nid;
> +             if (nid > 0 && affinity_form == FORM1_AFFINITY) {
> +                     int i;
> +                     const __be32 *associativity;
> +
> +                     index = lmb->aa_index * aa.array_sz;
> +                     associativity = &aa.arrays[index];
> +                     /*
> +                      * lookup array associativity entries have different 
> format
> +                      * There is no length of the array as the first element.

The difference it very small, and this is not a hot path.  Couldn't
you reduce a chunk of code by prepending aa.array_sz, then re-using
__initialize_form1_numa_distance.  Or even making
__initialize_form1_numa_distance() take the length as a parameter.

> +                      */
> +                     for (i = 0; i < max_associativity_domain_index; i++) {
> +                             const __be32 *entry;
> +
> +                             entry = 
> &associativity[be32_to_cpu(distance_ref_points[i]) - 1];

Does anywhere verify that distance_ref_points[i] <= aa.array_size for
every i?

> +                             distance_lookup_table[nid][i] = 
> of_read_number(entry, 1);
> +                     }
> +             }
> +     }
> +     return nid;
> +}
> +
>  /*
>   * This is like of_node_to_nid_single() for memory represented in the
>   * ibm,dynamic-reconfiguration-memory node.
> @@ -458,21 +518,14 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>  
>               if (nid == 0xffff || nid >= nr_node_ids)
>                       nid = default_nid;
> -
> -             if (nid > 0) {
> -                     index = lmb->aa_index * aa.array_sz;
> -                     initialize_distance_lookup_table(nid,
> -                                                     &aa.arrays[index]);
> -             }
>       }
> -
>       return nid;
>  }
>  
>  #ifdef CONFIG_PPC_SPLPAR
> -static int vphn_get_nid(long lcpu)
> +
> +static int __vphn_get_associativity(long lcpu, __be32 *associativity)
>  {
> -     __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
>       long rc, hwid;
>  
>       /*
> @@ -492,10 +545,22 @@ static int vphn_get_nid(long lcpu)
>  
>               rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
>               if (rc == H_SUCCESS)
> -                     return associativity_to_nid(associativity);
> +                     return 0;
>       }
>  
> +     return -1;
> +}
> +
> +static int vphn_get_nid(long lcpu)
> +{
> +     __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
> +
> +
> +     if (!__vphn_get_associativity(lcpu, associativity))
> +             return associativity_to_nid(associativity);
> +
>       return NUMA_NO_NODE;
> +
>  }
>  #else
>  static int vphn_get_nid(long unused)
> @@ -692,7 +757,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb 
> *lmb,
>                       size = read_n_cells(n_mem_size_cells, usm);
>               }
>  
> -             nid = of_drconf_to_nid_single(lmb);
> +             nid = get_nid_and_numa_distance(lmb);
>               fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
>                                         &nid);
>               node_set_online(nid);
> @@ -709,6 +774,7 @@ static int __init parse_numa_properties(void)
>       struct device_node *memory;
>       int default_nid = 0;
>       unsigned long i;
> +     const __be32 *associativity;
>  
>       if (numa_enabled == 0) {
>               printk(KERN_WARNING "NUMA disabled by user\n");
> @@ -734,18 +800,30 @@ static int __init parse_numa_properties(void)
>        * each node to be onlined must have NODE_DATA etc backing it.
>        */
>       for_each_present_cpu(i) {
> +             __be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
>               struct device_node *cpu;
> -             int nid = vphn_get_nid(i);
> +             int nid = NUMA_NO_NODE;
>  
> -             /*
> -              * Don't fall back to default_nid yet -- we will plug
> -              * cpus into nodes once the memory scan has discovered
> -              * the topology.
> -              */
> -             if (nid == NUMA_NO_NODE) {
> +             memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));

What's the memset() for?  AFAICT you only look at vphn_assoc in the
branch where __vphn_get_associativity() succeeds.

> +
> +             if (__vphn_get_associativity(i, vphn_assoc) == 0) {
> +                     nid = associativity_to_nid(vphn_assoc);
> +                     __initialize_form1_numa_distance(vphn_assoc);
> +             } else {
> +
> +                     /*
> +                      * Don't fall back to default_nid yet -- we will plug
> +                      * cpus into nodes once the memory scan has discovered
> +                      * the topology.
> +                      */
>                       cpu = of_get_cpu_node(i, NULL);
>                       BUG_ON(!cpu);
> -                     nid = of_node_to_nid_single(cpu);
> +
> +                     associativity = of_get_associativity(cpu);
> +                     if (associativity) {
> +                             nid = associativity_to_nid(associativity);
> +                             __initialize_form1_numa_distance(associativity);
> +                     }
>                       of_node_put(cpu);
>               }
>  
> @@ -781,8 +859,11 @@ static int __init parse_numa_properties(void)
>                * have associativity properties.  If none, then
>                * everything goes to default_nid.
>                */
> -             nid = of_node_to_nid_single(memory);
> -             if (nid < 0)
> +             associativity = of_get_associativity(memory);
> +             if (associativity) {
> +                     nid = associativity_to_nid(associativity);
> +                     __initialize_form1_numa_distance(associativity);
> +             } else
>                       nid = default_nid;
>  
>               fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 7e970f81d8ff..778b6ab35f0d 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -498,6 +498,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>               return saved_rc;
>       }
>  
> +     update_numa_distance(dn);
> +
>       rc = dlpar_online_cpu(dn);
>       if (rc) {
>               saved_rc = rc;
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 36f66556a7c6..40d350f31a34 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct 
> drmem_lmb *lmb)
>               return -ENODEV;
>       }
>  
> +     update_numa_distance(lmb_node);
> +
>       dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
>       if (!dr_node) {
>               dlpar_free_cc_nodes(lmb_node);
> diff --git a/arch/powerpc/platforms/pseries/pseries.h 
> b/arch/powerpc/platforms/pseries/pseries.h
> index 1f051a786fb3..663a0859cf13 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -113,4 +113,5 @@ extern u32 pseries_security_flavor;
>  void pseries_setup_security_mitigations(void);
>  void pseries_lpar_read_hblkrm_characteristics(void);
>  
> +void update_numa_distance(struct device_node *node);
>  #endif /* _PSERIES_PSERIES_H */

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to