RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Stefano, > -Original Message- > From: Stefano Stabellini > Sent: 2021年9月9日 6:32 > To: Wei Chen > Cc: Julien Grall ; Stefano Stabellini > ; xen-devel@lists.xenproject.org; Bertrand Marquis > ; Jan Beulich > Subject: RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > device tree memory node > > On Wed, 8 Sep 2021, Wei Chen wrote: > > > > >>> @@ -55,6 +57,79 @@ static int __init > > > > >> dtb_numa_processor_affinity_init(nodeid_t node) > > > > >>> return 0; > > > > >>> } > > > > >>> > > > > >>> +/* Callback for parsing of the memory regions affinity */ > > > > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > > > > >>> +paddr_t start, paddr_t size) > > > > >>> +{ > > > > >>> +struct node *nd; > > > > >>> +paddr_t end; > > > > >>> +int i; > > > > >>> + > > > > >>> +if ( srat_disabled() ) > > > > >>> +return -EINVAL; > > > > >>> + > > > > >>> +end = start + size; > > > > >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS ) > > > > >>> +{ > > > > >>> +dprintk(XENLOG_WARNING, > > > > >>> +"Too many numa entry, try bigger > NR_NODE_MEMBLKS > > > \n"); > > > > >>> +bad_srat(); > > > > >>> +return -EINVAL; > > > > >>> +} > > > > >>> + > > > > >>> +/* It is fine to add this area to the nodes data it will be > > > used > > > > >> later */ > > > > >>> +i = conflicting_memblks(start, end); > > > > >>> +/* No conflicting memory block, we can save it for later > usage > > > */; > > > > >>> +if ( i < 0 ) > > > > >>> +goto save_memblk; > > > > >>> + > > > > >>> +if ( memblk_nodeid[i] == node ) { > > > > >>> +/* > > > > >>> + * Overlaps with other memblk in the same node, warning > > > here. > > > > >>> + * This memblk will be merged with conflicted memblk > later. > > > > >>> + */ > > > > >>> +printk(XENLOG_WARNING > > > > >>> + "DT: NUMA NODE %u (%"PRIx64 > > > > >>> + "-%"PRIx64") overlaps with itself (%"PRIx64"- > > > > >> %"PRIx64")\n", > > > > >>> + node, start, end, > > > > >>> + node_memblk_range[i].start, > > > node_memblk_range[i].end); > > > > >>> +} else { > > > > >>> +/* > > > > >>> + * Conflict with memblk in other node, this is an error. > > > > >>> + * The NUMA information is invalid, NUMA will be turn > off. > > > > >>> + */ > > > > >>> +printk(XENLOG_ERR > > > > >>> + "DT: NUMA NODE %u (%"PRIx64"-%" > > > > >>> + PRIx64") overlaps with NODE %u (%"PRIx64"- > > > > %"PRIx64")\n", > > > > >>> + node, start, end, memblk_nodeid[i], > > > > >>> + node_memblk_range[i].start, > > > node_memblk_range[i].end); > > > > >>> +bad_srat(); > > > > >>> +return -EINVAL; > > > > >>> +} > > > > >>> + > > > > >>> +save_memblk: > > > > >>> +nd = &nodes[node]; > > > > >>> +if ( !node_test_and_set(node, memory_nodes_parsed) ) { > > > > >>> +nd->start = start; > > > > >>> +nd->end = end; > > > > >>> +} else { > > > > >>> +if ( start < nd->start ) > > > > >>> +nd->start = start; > > > > >>> +if ( nd->end < end ) > > > > >>> +nd->end = end; &
RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
On Wed, 8 Sep 2021, Wei Chen wrote: > > > >>> @@ -55,6 +57,79 @@ static int __init > > > >> dtb_numa_processor_affinity_init(nodeid_t node) > > > >>> return 0; > > > >>> } > > > >>> > > > >>> +/* Callback for parsing of the memory regions affinity */ > > > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > > > >>> +paddr_t start, paddr_t size) > > > >>> +{ > > > >>> +struct node *nd; > > > >>> +paddr_t end; > > > >>> +int i; > > > >>> + > > > >>> +if ( srat_disabled() ) > > > >>> +return -EINVAL; > > > >>> + > > > >>> +end = start + size; > > > >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS ) > > > >>> +{ > > > >>> +dprintk(XENLOG_WARNING, > > > >>> +"Too many numa entry, try bigger NR_NODE_MEMBLKS > > \n"); > > > >>> +bad_srat(); > > > >>> +return -EINVAL; > > > >>> +} > > > >>> + > > > >>> +/* It is fine to add this area to the nodes data it will be > > used > > > >> later */ > > > >>> +i = conflicting_memblks(start, end); > > > >>> +/* No conflicting memory block, we can save it for later usage > > */; > > > >>> +if ( i < 0 ) > > > >>> +goto save_memblk; > > > >>> + > > > >>> +if ( memblk_nodeid[i] == node ) { > > > >>> +/* > > > >>> + * Overlaps with other memblk in the same node, warning > > here. > > > >>> + * This memblk will be merged with conflicted memblk later. > > > >>> + */ > > > >>> +printk(XENLOG_WARNING > > > >>> + "DT: NUMA NODE %u (%"PRIx64 > > > >>> + "-%"PRIx64") overlaps with itself (%"PRIx64"- > > > >> %"PRIx64")\n", > > > >>> + node, start, end, > > > >>> + node_memblk_range[i].start, > > node_memblk_range[i].end); > > > >>> +} else { > > > >>> +/* > > > >>> + * Conflict with memblk in other node, this is an error. > > > >>> + * The NUMA information is invalid, NUMA will be turn off. > > > >>> + */ > > > >>> +printk(XENLOG_ERR > > > >>> + "DT: NUMA NODE %u (%"PRIx64"-%" > > > >>> + PRIx64") overlaps with NODE %u (%"PRIx64"- > > > %"PRIx64")\n", > > > >>> + node, start, end, memblk_nodeid[i], > > > >>> + node_memblk_range[i].start, > > node_memblk_range[i].end); > > > >>> +bad_srat(); > > > >>> +return -EINVAL; > > > >>> +} > > > >>> + > > > >>> +save_memblk: > > > >>> +nd = &nodes[node]; > > > >>> +if ( !node_test_and_set(node, memory_nodes_parsed) ) { > > > >>> +nd->start = start; > > > >>> +nd->end = end; > > > >>> +} else { > > > >>> +if ( start < nd->start ) > > > >>> +nd->start = start; > > > >>> +if ( nd->end < end ) > > > >>> +nd->end = end; > > > >>> +} > > > >>> + > > > >>> +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n", > > > >>> + node, start, end); > > > >>> + > > > >>> +node_memblk_range[num_node_memblks].start = start; > > > >>> +node_memblk_range[num_node_memblks].end = end; > > > >>> +memblk_nodeid[num_node_memblks] = node; > > > >>> +num_node_memblks++; > > > >> > > > >> > > > >> Is it possible to have non-contigous ranges of memory for a single > > NUMA > > > >> node? > > > >> > > > >> Looking at the DT bindings and Linux implementation, it seems > > possible. > > > >> Here, it seems that node_memblk_range/memblk_nodeid could handle it, > > > >> but nodes couldn't. > > > > > > > > Yes, you're right. I copied this code for x86 ACPI NUMA. Does ACPI > > allow > > > > non-contiguous ranges of memory for a single NUMA node too? > > > > > > I couldn't find any restriction for ACPI. Although, I only briefly > > > looked at the spec. > > > > > > > If yes, I think > > > > this will affect x86 ACPI NUMA too. In next version, we plan to merge > > > > dtb_numa_memory_affinity_init and acpi_numa_memory_affinity_init into > > a > > > > neutral function. So we can fix them at the same time. > > > > > > > > If not, maybe we have to keep the diversity for dtb and ACPI here. > > > > > > I am not entirely sure what you mean. Are you saying if ACPI doesn't > > > allow non-contiguous ranges of memory, then we should keep the > > > implementation separated? > > > > > > If so, then I disagree with that. It is fine to have code that supports > > > more than what a firmware table supports. The main benefit is less code > > > and therefore less long term maintenance (with the current solution we > > > would need to check both the ACPI and DT implementation if there is a > > > bug in one). > > > > > > > Yes, I agree. > > > > I am looking for some methods to address this comment. Current "nodes" > has not considered the situation of memory addresses of different NUMA > nodes can be interleaved. > > This code exists in x86 NUMA implementation. I think it may be based on > one early version of Linux x86
RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Julien, Stefano, Jan > -Original Message- > From: Xen-devel On Behalf Of Wei > Chen > Sent: 2021年8月28日 21:58 > To: Julien Grall ; Stefano Stabellini > > Cc: xen-devel@lists.xenproject.org; Bertrand Marquis > ; Jan Beulich > Subject: RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > device tree memory node > > Hi Julien, > > > -Original Message- > > From: Julien Grall > > Sent: 2021年8月28日 18:34 > > To: Wei Chen ; Stefano Stabellini > > > > Cc: xen-devel@lists.xenproject.org; Bertrand Marquis > > ; Jan Beulich > > Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > > device tree memory node > > > > Hi Wei, > > > > On 28/08/2021 04:56, Wei Chen wrote: > > >> -Original Message- > > >> From: Stefano Stabellini > > >> Sent: 2021��8��28�� 9:06 > > >> To: Wei Chen > > >> Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org; > > jul...@xen.org; > > >> jbeul...@suse.com; Bertrand Marquis > > >> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to > parse > > >> device tree memory node > > >> > > >> On Wed, 11 Aug 2021, Wei Chen wrote: > > >>> Memory blocks' NUMA ID information is stored in device tree's > > >>> memory nodes as "numa-node-id". We need a new helper to parse > > >>> and verify this ID from memory nodes. > > >>> > > >>> In order to support memory affinity in later use, the valid > > >>> memory ranges and NUMA ID will be saved to tables. > > >>> > > >>> Signed-off-by: Wei Chen > > >>> --- > > >>> xen/arch/arm/numa_device_tree.c | 130 > > > > >>> 1 file changed, 130 insertions(+) > > >>> > > >>> diff --git a/xen/arch/arm/numa_device_tree.c > > >> b/xen/arch/arm/numa_device_tree.c > > >>> index 37cc56acf3..bbe081dcd1 100644 > > >>> --- a/xen/arch/arm/numa_device_tree.c > > >>> +++ b/xen/arch/arm/numa_device_tree.c > > >>> @@ -20,11 +20,13 @@ > > >>> #include > > >>> #include > > >>> #include > > >>> +#include > > >>> #include > > >>> #include > > >>> > > >>> s8 device_tree_numa = 0; > > >>> static nodemask_t processor_nodes_parsed __initdata; > > >>> +static nodemask_t memory_nodes_parsed __initdata; > > >>> > > >>> static int srat_disabled(void) > > >>> { > > >>> @@ -55,6 +57,79 @@ static int __init > > >> dtb_numa_processor_affinity_init(nodeid_t node) > > >>> return 0; > > >>> } > > >>> > > >>> +/* Callback for parsing of the memory regions affinity */ > > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > > >>> +paddr_t start, paddr_t size) > > >>> +{ > > >>> +struct node *nd; > > >>> +paddr_t end; > > >>> +int i; > > >>> + > > >>> +if ( srat_disabled() ) > > >>> +return -EINVAL; > > >>> + > > >>> +end = start + size; > > >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS ) > > >>> +{ > > >>> +dprintk(XENLOG_WARNING, > > >>> +"Too many numa entry, try bigger NR_NODE_MEMBLKS > \n"); > > >>> +bad_srat(); > > >>> +return -EINVAL; > > >>> +} > > >>> + > > >>> +/* It is fine to add this area to the nodes data it will be > used > > >> later */ > > >>> +i = conflicting_memblks(start, end); > > >>> +/* No conflicting memory block, we can save it for later usage > */; > > >>> +if ( i < 0 ) > > >>> +goto save_memblk; > > >>> + > > >>> +if ( memblk_nodeid[i] == node ) { > > >>> +/* > > >>> + * Overlaps with other memblk in the same node, warning > here. > > >>> + * This memblk will be merged with conflicted memblk later. > > >>> + */ > > >>> +printk(XENLOG_W
RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Julien, > -Original Message- > From: Julien Grall > Sent: 2021年8月28日 18:34 > To: Wei Chen ; Stefano Stabellini > > Cc: xen-devel@lists.xenproject.org; Bertrand Marquis > ; Jan Beulich > Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > device tree memory node > > Hi Wei, > > On 28/08/2021 04:56, Wei Chen wrote: > >> -Original Message- > >> From: Stefano Stabellini > >> Sent: 2021��8��28�� 9:06 > >> To: Wei Chen > >> Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org; > jul...@xen.org; > >> jbeul...@suse.com; Bertrand Marquis > >> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > >> device tree memory node > >> > >> On Wed, 11 Aug 2021, Wei Chen wrote: > >>> Memory blocks' NUMA ID information is stored in device tree's > >>> memory nodes as "numa-node-id". We need a new helper to parse > >>> and verify this ID from memory nodes. > >>> > >>> In order to support memory affinity in later use, the valid > >>> memory ranges and NUMA ID will be saved to tables. > >>> > >>> Signed-off-by: Wei Chen > >>> --- > >>> xen/arch/arm/numa_device_tree.c | 130 > > >>> 1 file changed, 130 insertions(+) > >>> > >>> diff --git a/xen/arch/arm/numa_device_tree.c > >> b/xen/arch/arm/numa_device_tree.c > >>> index 37cc56acf3..bbe081dcd1 100644 > >>> --- a/xen/arch/arm/numa_device_tree.c > >>> +++ b/xen/arch/arm/numa_device_tree.c > >>> @@ -20,11 +20,13 @@ > >>> #include > >>> #include > >>> #include > >>> +#include > >>> #include > >>> #include > >>> > >>> s8 device_tree_numa = 0; > >>> static nodemask_t processor_nodes_parsed __initdata; > >>> +static nodemask_t memory_nodes_parsed __initdata; > >>> > >>> static int srat_disabled(void) > >>> { > >>> @@ -55,6 +57,79 @@ static int __init > >> dtb_numa_processor_affinity_init(nodeid_t node) > >>> return 0; > >>> } > >>> > >>> +/* Callback for parsing of the memory regions affinity */ > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > >>> +paddr_t start, paddr_t size) > >>> +{ > >>> +struct node *nd; > >>> +paddr_t end; > >>> +int i; > >>> + > >>> +if ( srat_disabled() ) > >>> +return -EINVAL; > >>> + > >>> +end = start + size; > >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS ) > >>> +{ > >>> +dprintk(XENLOG_WARNING, > >>> +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n"); > >>> +bad_srat(); > >>> +return -EINVAL; > >>> +} > >>> + > >>> +/* It is fine to add this area to the nodes data it will be used > >> later */ > >>> +i = conflicting_memblks(start, end); > >>> +/* No conflicting memory block, we can save it for later usage */; > >>> +if ( i < 0 ) > >>> +goto save_memblk; > >>> + > >>> +if ( memblk_nodeid[i] == node ) { > >>> +/* > >>> + * Overlaps with other memblk in the same node, warning here. > >>> + * This memblk will be merged with conflicted memblk later. > >>> + */ > >>> +printk(XENLOG_WARNING > >>> + "DT: NUMA NODE %u (%"PRIx64 > >>> + "-%"PRIx64") overlaps with itself (%"PRIx64"- > >> %"PRIx64")\n", > >>> + node, start, end, > >>> + node_memblk_range[i].start, node_memblk_range[i].end); > >>> +} else { > >>> +/* > >>> + * Conflict with memblk in other node, this is an error. > >>> + * The NUMA information is invalid, NUMA will be turn off. > >>> + */ > >>> +printk(XENLOG_ERR > >>> + "DT: NUMA NODE %u (%"PRIx64"-%" > >>> + PRIx64") overlaps with NODE %u (%"PRIx64"- > %"PRIx64"
Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Wei, On 28/08/2021 04:56, Wei Chen wrote: -Original Message- From: Stefano Stabellini Sent: 2021年8月28日 9:06 To: Wei Chen Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org; jul...@xen.org; jbeul...@suse.com; Bertrand Marquis Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node On Wed, 11 Aug 2021, Wei Chen wrote: Memory blocks' NUMA ID information is stored in device tree's memory nodes as "numa-node-id". We need a new helper to parse and verify this ID from memory nodes. In order to support memory affinity in later use, the valid memory ranges and NUMA ID will be saved to tables. Signed-off-by: Wei Chen --- xen/arch/arm/numa_device_tree.c | 130 1 file changed, 130 insertions(+) diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c index 37cc56acf3..bbe081dcd1 100644 --- a/xen/arch/arm/numa_device_tree.c +++ b/xen/arch/arm/numa_device_tree.c @@ -20,11 +20,13 @@ #include #include #include +#include #include #include s8 device_tree_numa = 0; static nodemask_t processor_nodes_parsed __initdata; +static nodemask_t memory_nodes_parsed __initdata; static int srat_disabled(void) { @@ -55,6 +57,79 @@ static int __init dtb_numa_processor_affinity_init(nodeid_t node) return 0; } +/* Callback for parsing of the memory regions affinity */ +static int __init dtb_numa_memory_affinity_init(nodeid_t node, +paddr_t start, paddr_t size) +{ +struct node *nd; +paddr_t end; +int i; + +if ( srat_disabled() ) +return -EINVAL; + +end = start + size; +if ( num_node_memblks >= NR_NODE_MEMBLKS ) +{ +dprintk(XENLOG_WARNING, +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n"); +bad_srat(); +return -EINVAL; +} + +/* It is fine to add this area to the nodes data it will be used later */ +i = conflicting_memblks(start, end); +/* No conflicting memory block, we can save it for later usage */; +if ( i < 0 ) +goto save_memblk; + +if ( memblk_nodeid[i] == node ) { +/* + * Overlaps with other memblk in the same node, warning here. + * This memblk will be merged with conflicted memblk later. + */ +printk(XENLOG_WARNING + "DT: NUMA NODE %u (%"PRIx64 + "-%"PRIx64") overlaps with itself (%"PRIx64"- %"PRIx64")\n", + node, start, end, + node_memblk_range[i].start, node_memblk_range[i].end); +} else { +/* + * Conflict with memblk in other node, this is an error. + * The NUMA information is invalid, NUMA will be turn off. + */ +printk(XENLOG_ERR + "DT: NUMA NODE %u (%"PRIx64"-%" + PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n", + node, start, end, memblk_nodeid[i], + node_memblk_range[i].start, node_memblk_range[i].end); +bad_srat(); +return -EINVAL; +} + +save_memblk: +nd = &nodes[node]; +if ( !node_test_and_set(node, memory_nodes_parsed) ) { +nd->start = start; +nd->end = end; +} else { +if ( start < nd->start ) +nd->start = start; +if ( nd->end < end ) +nd->end = end; +} + +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n", + node, start, end); + +node_memblk_range[num_node_memblks].start = start; +node_memblk_range[num_node_memblks].end = end; +memblk_nodeid[num_node_memblks] = node; +num_node_memblks++; Is it possible to have non-contigous ranges of memory for a single NUMA node? Looking at the DT bindings and Linux implementation, it seems possible. Here, it seems that node_memblk_range/memblk_nodeid could handle it, but nodes couldn't. Yes, you're right. I copied this code for x86 ACPI NUMA. Does ACPI allow non-contiguous ranges of memory for a single NUMA node too? I couldn't find any restriction for ACPI. Although, I only briefly looked at the spec. If yes, I think this will affect x86 ACPI NUMA too. In next version, we plan to merge dtb_numa_memory_affinity_init and acpi_numa_memory_affinity_init into a neutral function. So we can fix them at the same time. If not, maybe we have to keep the diversity for dtb and ACPI here. I am not entirely sure what you mean. Are you saying if ACPI doesn't allow non-contiguous ranges of memory, then we should keep the implementation separated? If so, then I disagree with that. It is fine to have code that supports more than what a firmware table supports. The main benefit is less code and therefore less long term maintenance (with the current solution we would need to check both the ACPI and DT implementation if there is a bug in one). Cheers, -- Julien Grall
RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Stefano, > -Original Message- > From: Stefano Stabellini > Sent: 2021年8月28日 9:06 > To: Wei Chen > Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org; jul...@xen.org; > jbeul...@suse.com; Bertrand Marquis > Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > device tree memory node > > On Wed, 11 Aug 2021, Wei Chen wrote: > > Memory blocks' NUMA ID information is stored in device tree's > > memory nodes as "numa-node-id". We need a new helper to parse > > and verify this ID from memory nodes. > > > > In order to support memory affinity in later use, the valid > > memory ranges and NUMA ID will be saved to tables. > > > > Signed-off-by: Wei Chen > > --- > > xen/arch/arm/numa_device_tree.c | 130 > > 1 file changed, 130 insertions(+) > > > > diff --git a/xen/arch/arm/numa_device_tree.c > b/xen/arch/arm/numa_device_tree.c > > index 37cc56acf3..bbe081dcd1 100644 > > --- a/xen/arch/arm/numa_device_tree.c > > +++ b/xen/arch/arm/numa_device_tree.c > > @@ -20,11 +20,13 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > > > s8 device_tree_numa = 0; > > static nodemask_t processor_nodes_parsed __initdata; > > +static nodemask_t memory_nodes_parsed __initdata; > > > > static int srat_disabled(void) > > { > > @@ -55,6 +57,79 @@ static int __init > dtb_numa_processor_affinity_init(nodeid_t node) > > return 0; > > } > > > > +/* Callback for parsing of the memory regions affinity */ > > +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > > +paddr_t start, paddr_t size) > > +{ > > +struct node *nd; > > +paddr_t end; > > +int i; > > + > > +if ( srat_disabled() ) > > +return -EINVAL; > > + > > +end = start + size; > > +if ( num_node_memblks >= NR_NODE_MEMBLKS ) > > +{ > > +dprintk(XENLOG_WARNING, > > +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n"); > > +bad_srat(); > > +return -EINVAL; > > +} > > + > > +/* It is fine to add this area to the nodes data it will be used > later */ > > +i = conflicting_memblks(start, end); > > +/* No conflicting memory block, we can save it for later usage */; > > +if ( i < 0 ) > > +goto save_memblk; > > + > > +if ( memblk_nodeid[i] == node ) { > > +/* > > + * Overlaps with other memblk in the same node, warning here. > > + * This memblk will be merged with conflicted memblk later. > > + */ > > +printk(XENLOG_WARNING > > + "DT: NUMA NODE %u (%"PRIx64 > > + "-%"PRIx64") overlaps with itself (%"PRIx64"- > %"PRIx64")\n", > > + node, start, end, > > + node_memblk_range[i].start, node_memblk_range[i].end); > > +} else { > > +/* > > + * Conflict with memblk in other node, this is an error. > > + * The NUMA information is invalid, NUMA will be turn off. > > + */ > > +printk(XENLOG_ERR > > + "DT: NUMA NODE %u (%"PRIx64"-%" > > + PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n", > > + node, start, end, memblk_nodeid[i], > > + node_memblk_range[i].start, node_memblk_range[i].end); > > +bad_srat(); > > +return -EINVAL; > > +} > > + > > +save_memblk: > > +nd = &nodes[node]; > > +if ( !node_test_and_set(node, memory_nodes_parsed) ) { > > +nd->start = start; > > +nd->end = end; > > +} else { > > +if ( start < nd->start ) > > +nd->start = start; > > +if ( nd->end < end ) > > +nd->end = end; > > +} > > + > > +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n", > > + node, start, end); > > + > > +node_memblk_range[num_node_memblks].start = start; > > +node_memblk_range[num_node_memblks].end = end; > > +memblk_nodeid[num_node_memblks] = node; > > +num_node_memblks++; > > > Is it possible to have non-contigous ranges of memory for a single NUMA > node? > > Looking at the DT bindings and Linux implementation, it seems possible. > Here, it seems that node_memblk_range/memblk_nodeid could handle it, > but nodes couldn't. Yes, you're right. I copied this code for x86 ACPI NUMA. Does ACPI allow non-contiguous ranges of memory for a single NUMA node too? If yes, I think this will affect x86 ACPI NUMA too. In next version, we plan to merge dtb_numa_memory_affinity_init and acpi_numa_memory_affinity_init into a neutral function. So we can fix them at the same time. If not, maybe we have to keep the diversity for dtb and ACPI here. Anyway, Thanks for pointing this, I will look into the latest Linux implementation. Cheers, Wei Chen
Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
On Wed, 11 Aug 2021, Wei Chen wrote: > Memory blocks' NUMA ID information is stored in device tree's > memory nodes as "numa-node-id". We need a new helper to parse > and verify this ID from memory nodes. > > In order to support memory affinity in later use, the valid > memory ranges and NUMA ID will be saved to tables. > > Signed-off-by: Wei Chen > --- > xen/arch/arm/numa_device_tree.c | 130 > 1 file changed, 130 insertions(+) > > diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c > index 37cc56acf3..bbe081dcd1 100644 > --- a/xen/arch/arm/numa_device_tree.c > +++ b/xen/arch/arm/numa_device_tree.c > @@ -20,11 +20,13 @@ > #include > #include > #include > +#include > #include > #include > > s8 device_tree_numa = 0; > static nodemask_t processor_nodes_parsed __initdata; > +static nodemask_t memory_nodes_parsed __initdata; > > static int srat_disabled(void) > { > @@ -55,6 +57,79 @@ static int __init > dtb_numa_processor_affinity_init(nodeid_t node) > return 0; > } > > +/* Callback for parsing of the memory regions affinity */ > +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > +paddr_t start, paddr_t size) > +{ > +struct node *nd; > +paddr_t end; > +int i; > + > +if ( srat_disabled() ) > +return -EINVAL; > + > +end = start + size; > +if ( num_node_memblks >= NR_NODE_MEMBLKS ) > +{ > +dprintk(XENLOG_WARNING, > +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n"); > +bad_srat(); > +return -EINVAL; > +} > + > +/* It is fine to add this area to the nodes data it will be used later */ > +i = conflicting_memblks(start, end); > +/* No conflicting memory block, we can save it for later usage */; > +if ( i < 0 ) > +goto save_memblk; > + > +if ( memblk_nodeid[i] == node ) { > +/* > + * Overlaps with other memblk in the same node, warning here. > + * This memblk will be merged with conflicted memblk later. > + */ > +printk(XENLOG_WARNING > + "DT: NUMA NODE %u (%"PRIx64 > + "-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n", > + node, start, end, > + node_memblk_range[i].start, node_memblk_range[i].end); > +} else { > +/* > + * Conflict with memblk in other node, this is an error. > + * The NUMA information is invalid, NUMA will be turn off. > + */ > +printk(XENLOG_ERR > + "DT: NUMA NODE %u (%"PRIx64"-%" > + PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n", > + node, start, end, memblk_nodeid[i], > + node_memblk_range[i].start, node_memblk_range[i].end); > +bad_srat(); > +return -EINVAL; > +} > + > +save_memblk: > +nd = &nodes[node]; > +if ( !node_test_and_set(node, memory_nodes_parsed) ) { > +nd->start = start; > +nd->end = end; > +} else { > +if ( start < nd->start ) > +nd->start = start; > +if ( nd->end < end ) > +nd->end = end; > +} > + > +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n", > + node, start, end); > + > +node_memblk_range[num_node_memblks].start = start; > +node_memblk_range[num_node_memblks].end = end; > +memblk_nodeid[num_node_memblks] = node; > +num_node_memblks++; Is it possible to have non-contigous ranges of memory for a single NUMA node? Looking at the DT bindings and Linux implementation, it seems possible. Here, it seems that node_memblk_range/memblk_nodeid could handle it, but nodes couldn't.
RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Julien, > -Original Message- > From: Julien Grall > Sent: 2021年8月26日 16:22 > To: Wei Chen ; xen-devel@lists.xenproject.org; > sstabell...@kernel.org > Cc: Bertrand Marquis > Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > device tree memory node > > > > On 26/08/2021 07:35, Wei Chen wrote: > > Hi Julien, > > Hi Wei, > > >> -Original Message- > >> From: Julien Grall > >> Sent: 2021年8月25日 21:49 > >> To: Wei Chen ; xen-devel@lists.xenproject.org; > >> sstabell...@kernel.org; jbeul...@suse.com > >> Cc: Bertrand Marquis > >> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > >> device tree memory node > >> > >> Hi Wei, > >> > >> On 11/08/2021 11:24, Wei Chen wrote: > >>> Memory blocks' NUMA ID information is stored in device tree's > >>> memory nodes as "numa-node-id". We need a new helper to parse > >>> and verify this ID from memory nodes. > >>> > >>> In order to support memory affinity in later use, the valid > >>> memory ranges and NUMA ID will be saved to tables. > >>> > >>> Signed-off-by: Wei Chen > >>> --- > >>>xen/arch/arm/numa_device_tree.c | 130 > > >>>1 file changed, 130 insertions(+) > >>> > >>> diff --git a/xen/arch/arm/numa_device_tree.c > >> b/xen/arch/arm/numa_device_tree.c > >>> index 37cc56acf3..bbe081dcd1 100644 > >>> --- a/xen/arch/arm/numa_device_tree.c > >>> +++ b/xen/arch/arm/numa_device_tree.c > >>> @@ -20,11 +20,13 @@ > >>>#include > >>>#include > >>>#include > >>> +#include > >>>#include > >>>#include > >>> > >>>s8 device_tree_numa = 0; > >>>static nodemask_t processor_nodes_parsed __initdata; > >>> +static nodemask_t memory_nodes_parsed __initdata; > >>> > >>>static int srat_disabled(void) > >>>{ > >>> @@ -55,6 +57,79 @@ static int __init > >> dtb_numa_processor_affinity_init(nodeid_t node) > >>>return 0; > >>>} > >>> > >>> +/* Callback for parsing of the memory regions affinity */ > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > >>> +paddr_t start, paddr_t size) > >>> +{ > >> > >> The implementation of this function is quite similar ot the ACPI > >> version. Can this be abstracted? > > > > In my draft, I had tried to merge ACPI and DTB versions in one > > function. I introduced a number of "if else" to distinguish ACPI > > from DTB, especially ACPI hotplug. The function seems very messy. > > Not enough benefits to make up for the mess, so I gave up. > > It think you can get away from distinguishing between ACPI and DT in > that helper: >* ma->flags & ACPI_SRAT_MEM_HOTPLUGGABLE could be replace by an > argument indicating whether the region is hotpluggable (this would > always be false for DT) >* Access to memblk_hotplug can be stubbed (in the future we may want > to consider memory hotplug even on Arm). > > Do you still have the "if else" version? If so can you post it? > I just tried to do that in draft process, because I was not satisfied with the changes, I haven't saved them as a patch. I think your suggestions are worth to try again, I will do it in next version. > Cheers, > > -- > Julien Grall
Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
On 26/08/2021 07:35, Wei Chen wrote: Hi Julien, Hi Wei, -Original Message- From: Julien Grall Sent: 2021年8月25日 21:49 To: Wei Chen ; xen-devel@lists.xenproject.org; sstabell...@kernel.org; jbeul...@suse.com Cc: Bertrand Marquis Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node Hi Wei, On 11/08/2021 11:24, Wei Chen wrote: Memory blocks' NUMA ID information is stored in device tree's memory nodes as "numa-node-id". We need a new helper to parse and verify this ID from memory nodes. In order to support memory affinity in later use, the valid memory ranges and NUMA ID will be saved to tables. Signed-off-by: Wei Chen --- xen/arch/arm/numa_device_tree.c | 130 1 file changed, 130 insertions(+) diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c index 37cc56acf3..bbe081dcd1 100644 --- a/xen/arch/arm/numa_device_tree.c +++ b/xen/arch/arm/numa_device_tree.c @@ -20,11 +20,13 @@ #include #include #include +#include #include #include s8 device_tree_numa = 0; static nodemask_t processor_nodes_parsed __initdata; +static nodemask_t memory_nodes_parsed __initdata; static int srat_disabled(void) { @@ -55,6 +57,79 @@ static int __init dtb_numa_processor_affinity_init(nodeid_t node) return 0; } +/* Callback for parsing of the memory regions affinity */ +static int __init dtb_numa_memory_affinity_init(nodeid_t node, +paddr_t start, paddr_t size) +{ The implementation of this function is quite similar ot the ACPI version. Can this be abstracted? In my draft, I had tried to merge ACPI and DTB versions in one function. I introduced a number of "if else" to distinguish ACPI from DTB, especially ACPI hotplug. The function seems very messy. Not enough benefits to make up for the mess, so I gave up. It think you can get away from distinguishing between ACPI and DT in that helper: * ma->flags & ACPI_SRAT_MEM_HOTPLUGGABLE could be replace by an argument indicating whether the region is hotpluggable (this would always be false for DT) * Access to memblk_hotplug can be stubbed (in the future we may want to consider memory hotplug even on Arm). Do you still have the "if else" version? If so can you post it? Cheers, -- Julien Grall
RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Julien, > -Original Message- > From: Julien Grall > Sent: 2021年8月25日 21:49 > To: Wei Chen ; xen-devel@lists.xenproject.org; > sstabell...@kernel.org; jbeul...@suse.com > Cc: Bertrand Marquis > Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse > device tree memory node > > Hi Wei, > > On 11/08/2021 11:24, Wei Chen wrote: > > Memory blocks' NUMA ID information is stored in device tree's > > memory nodes as "numa-node-id". We need a new helper to parse > > and verify this ID from memory nodes. > > > > In order to support memory affinity in later use, the valid > > memory ranges and NUMA ID will be saved to tables. > > > > Signed-off-by: Wei Chen > > --- > > xen/arch/arm/numa_device_tree.c | 130 > > 1 file changed, 130 insertions(+) > > > > diff --git a/xen/arch/arm/numa_device_tree.c > b/xen/arch/arm/numa_device_tree.c > > index 37cc56acf3..bbe081dcd1 100644 > > --- a/xen/arch/arm/numa_device_tree.c > > +++ b/xen/arch/arm/numa_device_tree.c > > @@ -20,11 +20,13 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > > > s8 device_tree_numa = 0; > > static nodemask_t processor_nodes_parsed __initdata; > > +static nodemask_t memory_nodes_parsed __initdata; > > > > static int srat_disabled(void) > > { > > @@ -55,6 +57,79 @@ static int __init > dtb_numa_processor_affinity_init(nodeid_t node) > > return 0; > > } > > > > +/* Callback for parsing of the memory regions affinity */ > > +static int __init dtb_numa_memory_affinity_init(nodeid_t node, > > +paddr_t start, paddr_t size) > > +{ > > The implementation of this function is quite similar ot the ACPI > version. Can this be abstracted? In my draft, I had tried to merge ACPI and DTB versions in one function. I introduced a number of "if else" to distinguish ACPI from DTB, especially ACPI hotplug. The function seems very messy. Not enough benefits to make up for the mess, so I gave up. But, yes, maybe we can abstract some common code to functions, that can be reused in two functions. I would try it in next version. > > > +struct node *nd; > > +paddr_t end; > > +int i; > > + > > +if ( srat_disabled() ) > > +return -EINVAL; > > + > > +end = start + size; > > +if ( num_node_memblks >= NR_NODE_MEMBLKS ) > > +{ > > +dprintk(XENLOG_WARNING, > > +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n"); > > +bad_srat(); > > +return -EINVAL; > > +} > > + > > +/* It is fine to add this area to the nodes data it will be used > later */ > > +i = conflicting_memblks(start, end); > > +/* No conflicting memory block, we can save it for later usage */; > > +if ( i < 0 ) > > +goto save_memblk; > > + > > +if ( memblk_nodeid[i] == node ) { > > Xen coding style is using: > > if ( ... ) > { > > Note that I may not comment on all the occurents, so please check the > other places. > Ok. > > +/* > > + * Overlaps with other memblk in the same node, warning here. > > + * This memblk will be merged with conflicted memblk later. > > + */ > > +printk(XENLOG_WARNING > > + "DT: NUMA NODE %u (%"PRIx64 > > + "-%"PRIx64") overlaps with itself (%"PRIx64"- > %"PRIx64")\n", > > + node, start, end, > > + node_memblk_range[i].start, node_memblk_range[i].end); > > +} else { > > +/* > > + * Conflict with memblk in other node, this is an error. > > + * The NUMA information is invalid, NUMA will be turn off. > > + */ > > +printk(XENLOG_ERR > > + "DT: NUMA NODE %u (%"PRIx64"-%" > > + PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n", > > + node, start, end, memblk_nodeid[i], > > + node_memblk_range[i].start, node_memblk_range[i].end); > > +bad_srat(); > > +return -EINVAL; > > +} > > + > > +save_memblk: > > +nd = &nodes[node]; > > +if ( !node_test_and_set(node, memory_nodes_parsed) ) { > > +nd->start = start; > > +
Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Hi Wei, On 11/08/2021 11:24, Wei Chen wrote: Memory blocks' NUMA ID information is stored in device tree's memory nodes as "numa-node-id". We need a new helper to parse and verify this ID from memory nodes. In order to support memory affinity in later use, the valid memory ranges and NUMA ID will be saved to tables. Signed-off-by: Wei Chen --- xen/arch/arm/numa_device_tree.c | 130 1 file changed, 130 insertions(+) diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c index 37cc56acf3..bbe081dcd1 100644 --- a/xen/arch/arm/numa_device_tree.c +++ b/xen/arch/arm/numa_device_tree.c @@ -20,11 +20,13 @@ #include #include #include +#include #include #include s8 device_tree_numa = 0; static nodemask_t processor_nodes_parsed __initdata; +static nodemask_t memory_nodes_parsed __initdata; static int srat_disabled(void) { @@ -55,6 +57,79 @@ static int __init dtb_numa_processor_affinity_init(nodeid_t node) return 0; } +/* Callback for parsing of the memory regions affinity */ +static int __init dtb_numa_memory_affinity_init(nodeid_t node, +paddr_t start, paddr_t size) +{ The implementation of this function is quite similar ot the ACPI version. Can this be abstracted? +struct node *nd; +paddr_t end; +int i; + +if ( srat_disabled() ) +return -EINVAL; + +end = start + size; +if ( num_node_memblks >= NR_NODE_MEMBLKS ) +{ +dprintk(XENLOG_WARNING, +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n"); +bad_srat(); +return -EINVAL; +} + +/* It is fine to add this area to the nodes data it will be used later */ +i = conflicting_memblks(start, end); +/* No conflicting memory block, we can save it for later usage */; +if ( i < 0 ) +goto save_memblk; + +if ( memblk_nodeid[i] == node ) { Xen coding style is using: if ( ... ) { Note that I may not comment on all the occurents, so please check the other places. +/* + * Overlaps with other memblk in the same node, warning here. + * This memblk will be merged with conflicted memblk later. + */ +printk(XENLOG_WARNING + "DT: NUMA NODE %u (%"PRIx64 + "-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n", + node, start, end, + node_memblk_range[i].start, node_memblk_range[i].end); +} else { +/* + * Conflict with memblk in other node, this is an error. + * The NUMA information is invalid, NUMA will be turn off. + */ +printk(XENLOG_ERR + "DT: NUMA NODE %u (%"PRIx64"-%" + PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n", + node, start, end, memblk_nodeid[i], + node_memblk_range[i].start, node_memblk_range[i].end); +bad_srat(); +return -EINVAL; +} + +save_memblk: +nd = &nodes[node]; +if ( !node_test_and_set(node, memory_nodes_parsed) ) { +nd->start = start; +nd->end = end; +} else { +if ( start < nd->start ) +nd->start = start; +if ( nd->end < end ) +nd->end = end; +} + +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n", + node, start, end); + +node_memblk_range[num_node_memblks].start = start; +node_memblk_range[num_node_memblks].end = end; +memblk_nodeid[num_node_memblks] = node; +num_node_memblks++; + +return 0; +} + /* Parse CPU NUMA node info */ int __init device_tree_parse_numa_cpu_node(const void *fdt, int node) { @@ -70,3 +145,58 @@ int __init device_tree_parse_numa_cpu_node(const void *fdt, int node) return dtb_numa_processor_affinity_init(nid); } + +/* Parse memory node NUMA info */ +int __init +device_tree_parse_numa_memory_node(const void *fdt, int node, +const char *name, uint32_t addr_cells, uint32_t size_cells) This is pretty much a copy of process_memory_node(). Can we consider to collect the NUMA ID from there? If not, can we at least abstract the code? +{ +uint32_t nid; +int ret = 0, len; +paddr_t addr, size; +const struct fdt_property *prop; +uint32_t idx, ranges; +const __be32 *addresses; + +nid = device_tree_get_u32(fdt, node, "numa-node-id", MAX_NUMNODES); +if ( nid >= MAX_NUMNODES ) +{ +printk(XENLOG_WARNING "Node id %u exceeds maximum value\n", nid); +return -EINVAL; +} + +prop = fdt_get_property(fdt, node, "reg", &len); +if ( !prop ) +{ +printk(XENLOG_WARNING + "fdt: node `%s': missing `reg' property\n", name); +return -EINVAL; +} + +addresses = (const __be32 *)prop->data; +ranges = len / (sizeof(__be32)* (addr_cells + size_cells)); +for ( idx = 0; idx < ranges; idx++ ) +{ +device_tree_get_reg(&addresses, addr_cells
[XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node
Memory blocks' NUMA ID information is stored in device tree's memory nodes as "numa-node-id". We need a new helper to parse and verify this ID from memory nodes. In order to support memory affinity in later use, the valid memory ranges and NUMA ID will be saved to tables. Signed-off-by: Wei Chen --- xen/arch/arm/numa_device_tree.c | 130 1 file changed, 130 insertions(+) diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c index 37cc56acf3..bbe081dcd1 100644 --- a/xen/arch/arm/numa_device_tree.c +++ b/xen/arch/arm/numa_device_tree.c @@ -20,11 +20,13 @@ #include #include #include +#include #include #include s8 device_tree_numa = 0; static nodemask_t processor_nodes_parsed __initdata; +static nodemask_t memory_nodes_parsed __initdata; static int srat_disabled(void) { @@ -55,6 +57,79 @@ static int __init dtb_numa_processor_affinity_init(nodeid_t node) return 0; } +/* Callback for parsing of the memory regions affinity */ +static int __init dtb_numa_memory_affinity_init(nodeid_t node, +paddr_t start, paddr_t size) +{ +struct node *nd; +paddr_t end; +int i; + +if ( srat_disabled() ) +return -EINVAL; + +end = start + size; +if ( num_node_memblks >= NR_NODE_MEMBLKS ) +{ +dprintk(XENLOG_WARNING, +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n"); +bad_srat(); +return -EINVAL; +} + +/* It is fine to add this area to the nodes data it will be used later */ +i = conflicting_memblks(start, end); +/* No conflicting memory block, we can save it for later usage */; +if ( i < 0 ) +goto save_memblk; + +if ( memblk_nodeid[i] == node ) { +/* + * Overlaps with other memblk in the same node, warning here. + * This memblk will be merged with conflicted memblk later. + */ +printk(XENLOG_WARNING + "DT: NUMA NODE %u (%"PRIx64 + "-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n", + node, start, end, + node_memblk_range[i].start, node_memblk_range[i].end); +} else { +/* + * Conflict with memblk in other node, this is an error. + * The NUMA information is invalid, NUMA will be turn off. + */ +printk(XENLOG_ERR + "DT: NUMA NODE %u (%"PRIx64"-%" + PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n", + node, start, end, memblk_nodeid[i], + node_memblk_range[i].start, node_memblk_range[i].end); +bad_srat(); +return -EINVAL; +} + +save_memblk: +nd = &nodes[node]; +if ( !node_test_and_set(node, memory_nodes_parsed) ) { +nd->start = start; +nd->end = end; +} else { +if ( start < nd->start ) +nd->start = start; +if ( nd->end < end ) +nd->end = end; +} + +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n", + node, start, end); + +node_memblk_range[num_node_memblks].start = start; +node_memblk_range[num_node_memblks].end = end; +memblk_nodeid[num_node_memblks] = node; +num_node_memblks++; + +return 0; +} + /* Parse CPU NUMA node info */ int __init device_tree_parse_numa_cpu_node(const void *fdt, int node) { @@ -70,3 +145,58 @@ int __init device_tree_parse_numa_cpu_node(const void *fdt, int node) return dtb_numa_processor_affinity_init(nid); } + +/* Parse memory node NUMA info */ +int __init +device_tree_parse_numa_memory_node(const void *fdt, int node, +const char *name, uint32_t addr_cells, uint32_t size_cells) +{ +uint32_t nid; +int ret = 0, len; +paddr_t addr, size; +const struct fdt_property *prop; +uint32_t idx, ranges; +const __be32 *addresses; + +nid = device_tree_get_u32(fdt, node, "numa-node-id", MAX_NUMNODES); +if ( nid >= MAX_NUMNODES ) +{ +printk(XENLOG_WARNING "Node id %u exceeds maximum value\n", nid); +return -EINVAL; +} + +prop = fdt_get_property(fdt, node, "reg", &len); +if ( !prop ) +{ +printk(XENLOG_WARNING + "fdt: node `%s': missing `reg' property\n", name); +return -EINVAL; +} + +addresses = (const __be32 *)prop->data; +ranges = len / (sizeof(__be32)* (addr_cells + size_cells)); +for ( idx = 0; idx < ranges; idx++ ) +{ +device_tree_get_reg(&addresses, addr_cells, size_cells, &addr, &size); +/* Skip zero size ranges */ +if ( !size ) +continue; + +ret = dtb_numa_memory_affinity_init(nid, addr, size); +if ( ret ) { +printk(XENLOG_WARNING + "NUMA: process range#%d addr = %lx size=%lx failed!\n", + idx, addr, size); +return -EINVAL; +} +} + +if ( idx == 0 ) +{ +printk(XENLOG_ERR +