Re: [PATCH RFC 4/5] powerpc:numa Add helper functions to maintain chipid to nid mapping
On 09/28/2015 11:02 PM, Nishanth Aravamudan wrote: On 27.09.2015 [23:59:12 +0530], Raghavendra K T wrote: Create arrays that maps serial nids and sparse chipids. Note: My original idea had only two arrays of chipid to nid map. Final code is inspired by driver/acpi/numa.c that maps a proximity node with a logical node by Takayoshi Kochi , and thus uses an additional chipid_map nodemask. The mask helps in first unused nid easily by knowing first unset bit in the mask. No change in functionality. Signed-off-by: Raghavendra K T --- arch/powerpc/mm/numa.c | 48 +++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dd2073b..f015cad 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -63,6 +63,11 @@ static int form1_affinity; static int distance_ref_points_depth; static const __be32 *distance_ref_points; static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; +static nodemask_t chipid_map = NODE_MASK_NONE; +static int chipid_to_nid_map[MAX_NUMNODES] + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; Hrm, conceptually there are *more* chips than nodes, right? So what guarantees we won't see > MAX_NUMNODES chips? You are correct that nid <= chipids. and #nids = #chipids when all possible slots are populated. Considering we assume that maximum chip slots are no more than MAX_NUMNODES, how about having #define MAX_CHIPNODES MAX_NUMNODES and chipid_to_nid_map[MAX_CHIPNODES] = { [0 ... MAX_CHIPNODES - 1] = .. +static int nid_to_chipid_map[MAX_NUMNODES] + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; /* * Allocate node_to_cpumask_map based on number of available nodes @@ -133,6 +138,48 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } +int chipid_to_nid(int chipid) +{ + if (chipid < 0) + return NUMA_NO_NODE; Do you really want to support these cases? Or should they be bugs/warnings indicating that you got an unexpected input? Or at least WARN_ON_ONCE? Right. Querying for nid of an invalid chipid should be atleast WARN_ON_ONCE(). But 'll check once if there is any valid scenario before the change. + return chipid_to_nid_map[chipid]; +} + +int nid_to_chipid(int nid) +{ + if (nid < 0) + return NUMA_NO_NODE; + return nid_to_chipid_map[nid]; +} + +static void __map_chipid_to_nid(int chipid, int nid) +{ + if (chipid_to_nid_map[chipid] == NUMA_NO_NODE +|| nid < chipid_to_nid_map[chipid]) + chipid_to_nid_map[chipid] = nid; + if (nid_to_chipid_map[nid] == NUMA_NO_NODE + || chipid < nid_to_chipid_map[nid]) + nid_to_chipid_map[nid] = chipid; +} chip <-> node mapping is a static (physical) concept, right? Should we emit some debugging if for some reason we get a runtime call to remap an already mapped chip to a new node? Good point. Already mapped chipid to a different nid is unexpected whereas mapping chipid to same nid is expected.(because mapping comes from cpus belonging to same node). WARN_ON() should suffice here? + +int map_chipid_to_nid(int chipid) +{ + int nid; + + if (chipid < 0 || chipid >= MAX_NUMNODES) + return NUMA_NO_NODE; + + nid = chipid_to_nid_map[chipid]; + if (nid == NUMA_NO_NODE) { + if (nodes_weight(chipid_map) >= MAX_NUMNODES) + return NUMA_NO_NODE; If you create a KVM guest with a bogus topology, doesn't this just start losing NUMA information for very high-noded guests? 'll try to see if it is possible to hit this case, ideally we should not allow more than MAX_NUMNODES for chipids and we should abort early. + nid = first_unset_node(chipid_map); + __map_chipid_to_nid(chipid, nid); + node_set(nid, chipid_map); + } + return nid; +} + int numa_cpu_lookup(int cpu) { return numa_cpu_lookup_table[cpu]; @@ -264,7 +311,6 @@ out: return chipid; } - stray change? yep, will correct that. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH RFC 4/5] powerpc:numa Add helper functions to maintain chipid to nid mapping
On 27.09.2015 [23:59:12 +0530], Raghavendra K T wrote: > Create arrays that maps serial nids and sparse chipids. > > Note: My original idea had only two arrays of chipid to nid map. Final > code is inspired by driver/acpi/numa.c that maps a proximity node with > a logical node by Takayoshi Kochi , and thus > uses an additional chipid_map nodemask. The mask helps in first unused > nid easily by knowing first unset bit in the mask. > > No change in functionality. > > Signed-off-by: Raghavendra K T > --- > arch/powerpc/mm/numa.c | 48 +++- > 1 file changed, 47 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c > index dd2073b..f015cad 100644 > --- a/arch/powerpc/mm/numa.c > +++ b/arch/powerpc/mm/numa.c > @@ -63,6 +63,11 @@ static int form1_affinity; > static int distance_ref_points_depth; > static const __be32 *distance_ref_points; > static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; > +static nodemask_t chipid_map = NODE_MASK_NONE; > +static int chipid_to_nid_map[MAX_NUMNODES] > + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; Hrm, conceptually there are *more* chips than nodes, right? So what guarantees we won't see > MAX_NUMNODES chips? > +static int nid_to_chipid_map[MAX_NUMNODES] > + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; > > /* > * Allocate node_to_cpumask_map based on number of available nodes > @@ -133,6 +138,48 @@ static int __init fake_numa_create_new_node(unsigned > long end_pfn, > return 0; > } > > +int chipid_to_nid(int chipid) > +{ > + if (chipid < 0) > + return NUMA_NO_NODE; Do you really want to support these cases? Or should they be bugs/warnings indicating that you got an unexpected input? Or at least WARN_ON_ONCE? > + return chipid_to_nid_map[chipid]; > +} > + > +int nid_to_chipid(int nid) > +{ > + if (nid < 0) > + return NUMA_NO_NODE; > + return nid_to_chipid_map[nid]; > +} > + > +static void __map_chipid_to_nid(int chipid, int nid) > +{ > + if (chipid_to_nid_map[chipid] == NUMA_NO_NODE > + || nid < chipid_to_nid_map[chipid]) > + chipid_to_nid_map[chipid] = nid; > + if (nid_to_chipid_map[nid] == NUMA_NO_NODE > + || chipid < nid_to_chipid_map[nid]) > + nid_to_chipid_map[nid] = chipid; > +} chip <-> node mapping is a static (physical) concept, right? Should we emit some debugging if for some reason we get a runtime call to remap an already mapped chip to a new node? > + > +int map_chipid_to_nid(int chipid) > +{ > + int nid; > + > + if (chipid < 0 || chipid >= MAX_NUMNODES) > + return NUMA_NO_NODE; > + > + nid = chipid_to_nid_map[chipid]; > + if (nid == NUMA_NO_NODE) { > + if (nodes_weight(chipid_map) >= MAX_NUMNODES) > + return NUMA_NO_NODE; If you create a KVM guest with a bogus topology, doesn't this just start losing NUMA information for very high-noded guests? > + nid = first_unset_node(chipid_map); > + __map_chipid_to_nid(chipid, nid); > + node_set(nid, chipid_map); > + } > + return nid; > +} > + > int numa_cpu_lookup(int cpu) > { > return numa_cpu_lookup_table[cpu]; > @@ -264,7 +311,6 @@ out: > return chipid; > } > > - stray change? > /* Return the nid from associativity */ > static int associativity_to_nid(const __be32 *associativity) > { > -- > 1.7.11.7 > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH RFC 4/5] powerpc:numa Add helper functions to maintain chipid to nid mapping
Create arrays that maps serial nids and sparse chipids. Note: My original idea had only two arrays of chipid to nid map. Final code is inspired by driver/acpi/numa.c that maps a proximity node with a logical node by Takayoshi Kochi , and thus uses an additional chipid_map nodemask. The mask helps in first unused nid easily by knowing first unset bit in the mask. No change in functionality. Signed-off-by: Raghavendra K T --- arch/powerpc/mm/numa.c | 48 +++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dd2073b..f015cad 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -63,6 +63,11 @@ static int form1_affinity; static int distance_ref_points_depth; static const __be32 *distance_ref_points; static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; +static nodemask_t chipid_map = NODE_MASK_NONE; +static int chipid_to_nid_map[MAX_NUMNODES] + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; +static int nid_to_chipid_map[MAX_NUMNODES] + = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; /* * Allocate node_to_cpumask_map based on number of available nodes @@ -133,6 +138,48 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } +int chipid_to_nid(int chipid) +{ + if (chipid < 0) + return NUMA_NO_NODE; + return chipid_to_nid_map[chipid]; +} + +int nid_to_chipid(int nid) +{ + if (nid < 0) + return NUMA_NO_NODE; + return nid_to_chipid_map[nid]; +} + +static void __map_chipid_to_nid(int chipid, int nid) +{ + if (chipid_to_nid_map[chipid] == NUMA_NO_NODE +|| nid < chipid_to_nid_map[chipid]) + chipid_to_nid_map[chipid] = nid; + if (nid_to_chipid_map[nid] == NUMA_NO_NODE + || chipid < nid_to_chipid_map[nid]) + nid_to_chipid_map[nid] = chipid; +} + +int map_chipid_to_nid(int chipid) +{ + int nid; + + if (chipid < 0 || chipid >= MAX_NUMNODES) + return NUMA_NO_NODE; + + nid = chipid_to_nid_map[chipid]; + if (nid == NUMA_NO_NODE) { + if (nodes_weight(chipid_map) >= MAX_NUMNODES) + return NUMA_NO_NODE; + nid = first_unset_node(chipid_map); + __map_chipid_to_nid(chipid, nid); + node_set(nid, chipid_map); + } + return nid; +} + int numa_cpu_lookup(int cpu) { return numa_cpu_lookup_table[cpu]; @@ -264,7 +311,6 @@ out: return chipid; } - /* Return the nid from associativity */ static int associativity_to_nid(const __be32 *associativity) { -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev