Re: [PATCH RFC 4/5] powerpc:numa Add helper functions to maintain chipid to nid mapping

2015-09-29 Thread Raghavendra K T

On 09/28/2015 11:02 PM, Nishanth Aravamudan wrote:

On 27.09.2015 [23:59:12 +0530], Raghavendra K T wrote:

Create arrays that maps serial nids and sparse chipids.

Note: My original idea had only two arrays of chipid to nid map. Final
code is inspired by driver/acpi/numa.c that maps a proximity node with
a logical node by Takayoshi Kochi , and thus
uses an additional chipid_map nodemask. The mask helps in first unused
nid easily by knowing first unset bit in the mask.

No change in functionality.

Signed-off-by: Raghavendra K T 
---
  arch/powerpc/mm/numa.c | 48 +++-
  1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index dd2073b..f015cad 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -63,6 +63,11 @@ static int form1_affinity;
  static int distance_ref_points_depth;
  static const __be32 *distance_ref_points;
  static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static nodemask_t chipid_map = NODE_MASK_NONE;
+static int chipid_to_nid_map[MAX_NUMNODES]
+   = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };


Hrm, conceptually there are *more* chips than nodes, right? So what
guarantees we won't see > MAX_NUMNODES chips?


You are correct that nid <= chipids.
and #nids = #chipids when all possible slots are populated. Considering
we assume that maximum chip slots are no more than MAX_NUMNODES,


how about having

#define MAX_CHIPNODES MAX_NUMNODES
and
chipid_to_nid_map[MAX_CHIPNODES] = { [0 ... MAX_CHIPNODES - 1] = ..




+static int nid_to_chipid_map[MAX_NUMNODES]
+   = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };

  /*
   * Allocate node_to_cpumask_map based on number of available nodes
@@ -133,6 +138,48 @@ static int __init fake_numa_create_new_node(unsigned long 
end_pfn,
return 0;
  }

+int chipid_to_nid(int chipid)
+{
+   if (chipid < 0)
+   return NUMA_NO_NODE;


Do you really want to support these cases? Or should they be
bugs/warnings indicating that you got an unexpected input? Or at least
WARN_ON_ONCE?



Right. Querying for nid of an invalid chipid should be atleast
WARN_ON_ONCE(). But 'll check once if there is any valid scenario
before the change.


+   return chipid_to_nid_map[chipid];
+}
+
+int nid_to_chipid(int nid)
+{
+   if (nid < 0)
+   return NUMA_NO_NODE;
+   return nid_to_chipid_map[nid];
+}
+
+static void __map_chipid_to_nid(int chipid, int nid)
+{
+   if (chipid_to_nid_map[chipid] == NUMA_NO_NODE
+|| nid < chipid_to_nid_map[chipid])
+   chipid_to_nid_map[chipid] = nid;
+   if (nid_to_chipid_map[nid] == NUMA_NO_NODE
+   || chipid < nid_to_chipid_map[nid])
+   nid_to_chipid_map[nid] = chipid;
+}


chip <-> node mapping is a static (physical) concept, right? Should we
emit some debugging if for some reason we get a runtime call to remap
an already mapped chip to a new node?



Good point. Already mapped chipid to a different nid is unexpected
whereas mapping chipid to same nid is expected.(because mapping comes
from cpus belonging to same node).

WARN_ON() should suffice here?



+
+int map_chipid_to_nid(int chipid)
+{
+   int nid;
+
+   if (chipid < 0 || chipid >= MAX_NUMNODES)
+   return NUMA_NO_NODE;
+
+   nid = chipid_to_nid_map[chipid];
+   if (nid == NUMA_NO_NODE) {
+   if (nodes_weight(chipid_map) >= MAX_NUMNODES)
+   return NUMA_NO_NODE;


If you create a KVM guest with a bogus topology, doesn't this just start
losing NUMA information for very high-noded guests?



'll try to see if it is possible to hit this case, ideally we should
not allow more than MAX_NUMNODES for chipids and we should abort early.


+   nid = first_unset_node(chipid_map);
+   __map_chipid_to_nid(chipid, nid);
+   node_set(nid, chipid_map);
+   }
+   return nid;
+}
+
  int numa_cpu_lookup(int cpu)
  {
return numa_cpu_lookup_table[cpu];
@@ -264,7 +311,6 @@ out:
return chipid;
  }

-


stray change?



yep, will correct that.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH RFC 4/5] powerpc:numa Add helper functions to maintain chipid to nid mapping

2015-09-28 Thread Nishanth Aravamudan
On 27.09.2015 [23:59:12 +0530], Raghavendra K T wrote:
> Create arrays that maps serial nids and sparse chipids.
> 
> Note: My original idea had only two arrays of chipid to nid map. Final
> code is inspired by driver/acpi/numa.c that maps a proximity node with
> a logical node by Takayoshi Kochi , and thus
> uses an additional chipid_map nodemask. The mask helps in first unused
> nid easily by knowing first unset bit in the mask.
> 
> No change in functionality.
> 
> Signed-off-by: Raghavendra K T 
> ---
>  arch/powerpc/mm/numa.c | 48 +++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index dd2073b..f015cad 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -63,6 +63,11 @@ static int form1_affinity;
>  static int distance_ref_points_depth;
>  static const __be32 *distance_ref_points;
>  static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
> +static nodemask_t chipid_map = NODE_MASK_NONE;
> +static int chipid_to_nid_map[MAX_NUMNODES]
> + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };

Hrm, conceptually there are *more* chips than nodes, right? So what
guarantees we won't see > MAX_NUMNODES chips?

> +static int nid_to_chipid_map[MAX_NUMNODES]
> + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
> 
>  /*
>   * Allocate node_to_cpumask_map based on number of available nodes
> @@ -133,6 +138,48 @@ static int __init fake_numa_create_new_node(unsigned 
> long end_pfn,
>   return 0;
>  }
> 
> +int chipid_to_nid(int chipid)
> +{
> + if (chipid < 0)
> + return NUMA_NO_NODE;

Do you really want to support these cases? Or should they be
bugs/warnings indicating that you got an unexpected input? Or at least
WARN_ON_ONCE?

> + return chipid_to_nid_map[chipid];
> +}
> +
> +int nid_to_chipid(int nid)
> +{
> + if (nid < 0)
> + return NUMA_NO_NODE;
> + return nid_to_chipid_map[nid];
> +}
> +
> +static void __map_chipid_to_nid(int chipid, int nid)
> +{
> + if (chipid_to_nid_map[chipid] == NUMA_NO_NODE
> +  || nid < chipid_to_nid_map[chipid])
> + chipid_to_nid_map[chipid] = nid;
> + if (nid_to_chipid_map[nid] == NUMA_NO_NODE
> + || chipid < nid_to_chipid_map[nid])
> + nid_to_chipid_map[nid] = chipid;
> +}

chip <-> node mapping is a static (physical) concept, right? Should we
emit some debugging if for some reason we get a runtime call to remap
an already mapped chip to a new node?

> +
> +int map_chipid_to_nid(int chipid)
> +{
> + int nid;
> +
> + if (chipid < 0 || chipid >= MAX_NUMNODES)
> + return NUMA_NO_NODE;
> +
> + nid = chipid_to_nid_map[chipid];
> + if (nid == NUMA_NO_NODE) {
> + if (nodes_weight(chipid_map) >= MAX_NUMNODES)
> + return NUMA_NO_NODE;

If you create a KVM guest with a bogus topology, doesn't this just start
losing NUMA information for very high-noded guests?

> + nid = first_unset_node(chipid_map);
> + __map_chipid_to_nid(chipid, nid);
> + node_set(nid, chipid_map);
> + }
> + return nid;
> +}
> +
>  int numa_cpu_lookup(int cpu)
>  {
>   return numa_cpu_lookup_table[cpu];
> @@ -264,7 +311,6 @@ out:
>   return chipid;
>  }
> 
> -

stray change?

>   /* Return the nid from associativity */
>  static int associativity_to_nid(const __be32 *associativity)
>  {
> -- 
> 1.7.11.7
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RFC 4/5] powerpc:numa Add helper functions to maintain chipid to nid mapping

2015-09-27 Thread Raghavendra K T
Create arrays that maps serial nids and sparse chipids.

Note: My original idea had only two arrays of chipid to nid map. Final
code is inspired by driver/acpi/numa.c that maps a proximity node with
a logical node by Takayoshi Kochi , and thus
uses an additional chipid_map nodemask. The mask helps in first unused
nid easily by knowing first unset bit in the mask.

No change in functionality.

Signed-off-by: Raghavendra K T 
---
 arch/powerpc/mm/numa.c | 48 +++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index dd2073b..f015cad 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -63,6 +63,11 @@ static int form1_affinity;
 static int distance_ref_points_depth;
 static const __be32 *distance_ref_points;
 static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static nodemask_t chipid_map = NODE_MASK_NONE;
+static int chipid_to_nid_map[MAX_NUMNODES]
+   = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
+static int nid_to_chipid_map[MAX_NUMNODES]
+   = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
 
 /*
  * Allocate node_to_cpumask_map based on number of available nodes
@@ -133,6 +138,48 @@ static int __init fake_numa_create_new_node(unsigned long 
end_pfn,
return 0;
 }
 
+int chipid_to_nid(int chipid)
+{
+   if (chipid < 0)
+   return NUMA_NO_NODE;
+   return chipid_to_nid_map[chipid];
+}
+
+int nid_to_chipid(int nid)
+{
+   if (nid < 0)
+   return NUMA_NO_NODE;
+   return nid_to_chipid_map[nid];
+}
+
+static void __map_chipid_to_nid(int chipid, int nid)
+{
+   if (chipid_to_nid_map[chipid] == NUMA_NO_NODE
+|| nid < chipid_to_nid_map[chipid])
+   chipid_to_nid_map[chipid] = nid;
+   if (nid_to_chipid_map[nid] == NUMA_NO_NODE
+   || chipid < nid_to_chipid_map[nid])
+   nid_to_chipid_map[nid] = chipid;
+}
+
+int map_chipid_to_nid(int chipid)
+{
+   int nid;
+
+   if (chipid < 0 || chipid >= MAX_NUMNODES)
+   return NUMA_NO_NODE;
+
+   nid = chipid_to_nid_map[chipid];
+   if (nid == NUMA_NO_NODE) {
+   if (nodes_weight(chipid_map) >= MAX_NUMNODES)
+   return NUMA_NO_NODE;
+   nid = first_unset_node(chipid_map);
+   __map_chipid_to_nid(chipid, nid);
+   node_set(nid, chipid_map);
+   }
+   return nid;
+}
+
 int numa_cpu_lookup(int cpu)
 {
return numa_cpu_lookup_table[cpu];
@@ -264,7 +311,6 @@ out:
return chipid;
 }
 
-
  /* Return the nid from associativity */
 static int associativity_to_nid(const __be32 *associativity)
 {
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev