RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-09-08 Thread Wei Chen
Hi Stefano,

> -Original Message-
> From: Stefano Stabellini 
> Sent: 2021年9月9日 6:32
> To: Wei Chen 
> Cc: Julien Grall ; Stefano Stabellini
> ; xen-devel@lists.xenproject.org; Bertrand Marquis
> ; Jan Beulich 
> Subject: RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> device tree memory node
> 
> On Wed, 8 Sep 2021, Wei Chen wrote:
> > > > >>> @@ -55,6 +57,79 @@ static int __init
> > > > >> dtb_numa_processor_affinity_init(nodeid_t node)
> > > > >>>   return 0;
> > > > >>>   }
> > > > >>>
> > > > >>> +/* Callback for parsing of the memory regions affinity */
> > > > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> > > > >>> +paddr_t start, paddr_t size)
> > > > >>> +{
> > > > >>> +struct node *nd;
> > > > >>> +paddr_t end;
> > > > >>> +int i;
> > > > >>> +
> > > > >>> +if ( srat_disabled() )
> > > > >>> +return -EINVAL;
> > > > >>> +
> > > > >>> +end = start + size;
> > > > >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS )
> > > > >>> +{
> > > > >>> +dprintk(XENLOG_WARNING,
> > > > >>> +"Too many numa entry, try bigger
> NR_NODE_MEMBLKS
> > > \n");
> > > > >>> +bad_srat();
> > > > >>> +return -EINVAL;
> > > > >>> +}
> > > > >>> +
> > > > >>> +/* It is fine to add this area to the nodes data it will be
> > > used
> > > > >> later */
> > > > >>> +i = conflicting_memblks(start, end);
> > > > >>> +/* No conflicting memory block, we can save it for later
> usage
> > > */;
> > > > >>> +if ( i < 0 )
> > > > >>> +goto save_memblk;
> > > > >>> +
> > > > >>> +if ( memblk_nodeid[i] == node ) {
> > > > >>> +/*
> > > > >>> + * Overlaps with other memblk in the same node, warning
> > > here.
> > > > >>> + * This memblk will be merged with conflicted memblk
> later.
> > > > >>> + */
> > > > >>> +printk(XENLOG_WARNING
> > > > >>> +   "DT: NUMA NODE %u (%"PRIx64
> > > > >>> +   "-%"PRIx64") overlaps with itself (%"PRIx64"-
> > > > >> %"PRIx64")\n",
> > > > >>> +   node, start, end,
> > > > >>> +   node_memblk_range[i].start,
> > > node_memblk_range[i].end);
> > > > >>> +} else {
> > > > >>> +/*
> > > > >>> + * Conflict with memblk in other node, this is an error.
> > > > >>> + * The NUMA information is invalid, NUMA will be turn
> off.
> > > > >>> + */
> > > > >>> +printk(XENLOG_ERR
> > > > >>> +   "DT: NUMA NODE %u (%"PRIx64"-%"
> > > > >>> +   PRIx64") overlaps with NODE %u (%"PRIx64"-
> > > > %"PRIx64")\n",
> > > > >>> +   node, start, end, memblk_nodeid[i],
> > > > >>> +   node_memblk_range[i].start,
> > > node_memblk_range[i].end);
> > > > >>> +bad_srat();
> > > > >>> +return -EINVAL;
> > > > >>> +}
> > > > >>> +
> > > > >>> +save_memblk:
> > > > >>> +nd = &nodes[node];
> > > > >>> +if ( !node_test_and_set(node, memory_nodes_parsed) ) {
> > > > >>> +nd->start = start;
> > > > >>> +nd->end = end;
> > > > >>> +} else {
> > > > >>> +if ( start < nd->start )
> > > > >>> +nd->start = start;
> > > > >>> +if ( nd->end < end )
> > > > >>> +nd->end = end;
&

RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-09-08 Thread Stefano Stabellini
On Wed, 8 Sep 2021, Wei Chen wrote:
> > > >>> @@ -55,6 +57,79 @@ static int __init
> > > >> dtb_numa_processor_affinity_init(nodeid_t node)
> > > >>>   return 0;
> > > >>>   }
> > > >>>
> > > >>> +/* Callback for parsing of the memory regions affinity */
> > > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> > > >>> +paddr_t start, paddr_t size)
> > > >>> +{
> > > >>> +struct node *nd;
> > > >>> +paddr_t end;
> > > >>> +int i;
> > > >>> +
> > > >>> +if ( srat_disabled() )
> > > >>> +return -EINVAL;
> > > >>> +
> > > >>> +end = start + size;
> > > >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS )
> > > >>> +{
> > > >>> +dprintk(XENLOG_WARNING,
> > > >>> +"Too many numa entry, try bigger NR_NODE_MEMBLKS
> > \n");
> > > >>> +bad_srat();
> > > >>> +return -EINVAL;
> > > >>> +}
> > > >>> +
> > > >>> +/* It is fine to add this area to the nodes data it will be
> > used
> > > >> later */
> > > >>> +i = conflicting_memblks(start, end);
> > > >>> +/* No conflicting memory block, we can save it for later usage
> > */;
> > > >>> +if ( i < 0 )
> > > >>> +goto save_memblk;
> > > >>> +
> > > >>> +if ( memblk_nodeid[i] == node ) {
> > > >>> +/*
> > > >>> + * Overlaps with other memblk in the same node, warning
> > here.
> > > >>> + * This memblk will be merged with conflicted memblk later.
> > > >>> + */
> > > >>> +printk(XENLOG_WARNING
> > > >>> +   "DT: NUMA NODE %u (%"PRIx64
> > > >>> +   "-%"PRIx64") overlaps with itself (%"PRIx64"-
> > > >> %"PRIx64")\n",
> > > >>> +   node, start, end,
> > > >>> +   node_memblk_range[i].start,
> > node_memblk_range[i].end);
> > > >>> +} else {
> > > >>> +/*
> > > >>> + * Conflict with memblk in other node, this is an error.
> > > >>> + * The NUMA information is invalid, NUMA will be turn off.
> > > >>> + */
> > > >>> +printk(XENLOG_ERR
> > > >>> +   "DT: NUMA NODE %u (%"PRIx64"-%"
> > > >>> +   PRIx64") overlaps with NODE %u (%"PRIx64"-
> > > %"PRIx64")\n",
> > > >>> +   node, start, end, memblk_nodeid[i],
> > > >>> +   node_memblk_range[i].start,
> > node_memblk_range[i].end);
> > > >>> +bad_srat();
> > > >>> +return -EINVAL;
> > > >>> +}
> > > >>> +
> > > >>> +save_memblk:
> > > >>> +nd = &nodes[node];
> > > >>> +if ( !node_test_and_set(node, memory_nodes_parsed) ) {
> > > >>> +nd->start = start;
> > > >>> +nd->end = end;
> > > >>> +} else {
> > > >>> +if ( start < nd->start )
> > > >>> +nd->start = start;
> > > >>> +if ( nd->end < end )
> > > >>> +nd->end = end;
> > > >>> +}
> > > >>> +
> > > >>> +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n",
> > > >>> +   node, start, end);
> > > >>> +
> > > >>> +node_memblk_range[num_node_memblks].start = start;
> > > >>> +node_memblk_range[num_node_memblks].end = end;
> > > >>> +memblk_nodeid[num_node_memblks] = node;
> > > >>> +num_node_memblks++;
> > > >>
> > > >>
> > > >> Is it possible to have non-contigous ranges of memory for a single
> > NUMA
> > > >> node?
> > > >>
> > > >> Looking at the DT bindings and Linux implementation, it seems
> > possible.
> > > >> Here, it seems that node_memblk_range/memblk_nodeid could handle it,
> > > >> but nodes couldn't.
> > > >
> > > > Yes, you're right. I copied this code for x86 ACPI NUMA. Does ACPI
> > allow
> > > > non-contiguous ranges of memory for a single NUMA node too?
> > >
> > > I couldn't find any restriction for ACPI. Although, I only briefly
> > > looked at the spec.
> > >
> > > > If yes, I think
> > > > this will affect x86 ACPI NUMA too. In next version, we plan to merge
> > > > dtb_numa_memory_affinity_init and acpi_numa_memory_affinity_init into
> > a
> > > > neutral function. So we can fix them at the same time.
> > > >
> > > > If not, maybe we have to keep the diversity for dtb and ACPI here.
> > >
> > > I am not entirely sure what you mean. Are you saying if ACPI doesn't
> > > allow non-contiguous ranges of memory, then we should keep the
> > > implementation separated?
> > >
> > > If so, then I disagree with that. It is fine to have code that supports
> > > more than what a firmware table supports. The main benefit is less code
> > > and therefore less long term maintenance (with the current solution we
> > > would need to check both the ACPI and DT implementation if there is a
> > > bug in one).
> > >
> > 
> > Yes, I agree.
> > 
> 
> I am looking for some methods to address this comment. Current "nodes"
> has not considered the situation of memory addresses of different NUMA
> nodes can be interleaved.
> 
> This code exists in x86 NUMA implementation. I think it may be based on
> one early version of Linux x86

RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-09-08 Thread Wei Chen
Hi Julien, Stefano, Jan

> -Original Message-
> From: Xen-devel  On Behalf Of Wei
> Chen
> Sent: 2021年8月28日 21:58
> To: Julien Grall ; Stefano Stabellini
> 
> Cc: xen-devel@lists.xenproject.org; Bertrand Marquis
> ; Jan Beulich 
> Subject: RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> device tree memory node
> 
> Hi Julien,
> 
> > -Original Message-
> > From: Julien Grall 
> > Sent: 2021年8月28日 18:34
> > To: Wei Chen ; Stefano Stabellini
> > 
> > Cc: xen-devel@lists.xenproject.org; Bertrand Marquis
> > ; Jan Beulich 
> > Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> > device tree memory node
> >
> > Hi Wei,
> >
> > On 28/08/2021 04:56, Wei Chen wrote:
> > >> -Original Message-
> > >> From: Stefano Stabellini 
> > >> Sent: 2021��8��28�� 9:06
> > >> To: Wei Chen 
> > >> Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org;
> > jul...@xen.org;
> > >> jbeul...@suse.com; Bertrand Marquis 
> > >> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to
> parse
> > >> device tree memory node
> > >>
> > >> On Wed, 11 Aug 2021, Wei Chen wrote:
> > >>> Memory blocks' NUMA ID information is stored in device tree's
> > >>> memory nodes as "numa-node-id". We need a new helper to parse
> > >>> and verify this ID from memory nodes.
> > >>>
> > >>> In order to support memory affinity in later use, the valid
> > >>> memory ranges and NUMA ID will be saved to tables.
> > >>>
> > >>> Signed-off-by: Wei Chen 
> > >>> ---
> > >>>   xen/arch/arm/numa_device_tree.c | 130
> > 
> > >>>   1 file changed, 130 insertions(+)
> > >>>
> > >>> diff --git a/xen/arch/arm/numa_device_tree.c
> > >> b/xen/arch/arm/numa_device_tree.c
> > >>> index 37cc56acf3..bbe081dcd1 100644
> > >>> --- a/xen/arch/arm/numa_device_tree.c
> > >>> +++ b/xen/arch/arm/numa_device_tree.c
> > >>> @@ -20,11 +20,13 @@
> > >>>   #include 
> > >>>   #include 
> > >>>   #include 
> > >>> +#include 
> > >>>   #include 
> > >>>   #include 
> > >>>
> > >>>   s8 device_tree_numa = 0;
> > >>>   static nodemask_t processor_nodes_parsed __initdata;
> > >>> +static nodemask_t memory_nodes_parsed __initdata;
> > >>>
> > >>>   static int srat_disabled(void)
> > >>>   {
> > >>> @@ -55,6 +57,79 @@ static int __init
> > >> dtb_numa_processor_affinity_init(nodeid_t node)
> > >>>   return 0;
> > >>>   }
> > >>>
> > >>> +/* Callback for parsing of the memory regions affinity */
> > >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> > >>> +paddr_t start, paddr_t size)
> > >>> +{
> > >>> +struct node *nd;
> > >>> +paddr_t end;
> > >>> +int i;
> > >>> +
> > >>> +if ( srat_disabled() )
> > >>> +return -EINVAL;
> > >>> +
> > >>> +end = start + size;
> > >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS )
> > >>> +{
> > >>> +dprintk(XENLOG_WARNING,
> > >>> +"Too many numa entry, try bigger NR_NODE_MEMBLKS
> \n");
> > >>> +bad_srat();
> > >>> +return -EINVAL;
> > >>> +}
> > >>> +
> > >>> +/* It is fine to add this area to the nodes data it will be
> used
> > >> later */
> > >>> +i = conflicting_memblks(start, end);
> > >>> +/* No conflicting memory block, we can save it for later usage
> */;
> > >>> +if ( i < 0 )
> > >>> +goto save_memblk;
> > >>> +
> > >>> +if ( memblk_nodeid[i] == node ) {
> > >>> +/*
> > >>> + * Overlaps with other memblk in the same node, warning
> here.
> > >>> + * This memblk will be merged with conflicted memblk later.
> > >>> + */
> > >>> +printk(XENLOG_W

RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-28 Thread Wei Chen
Hi Julien,

> -Original Message-
> From: Julien Grall 
> Sent: 2021年8月28日 18:34
> To: Wei Chen ; Stefano Stabellini
> 
> Cc: xen-devel@lists.xenproject.org; Bertrand Marquis
> ; Jan Beulich 
> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> device tree memory node
> 
> Hi Wei,
> 
> On 28/08/2021 04:56, Wei Chen wrote:
> >> -Original Message-
> >> From: Stefano Stabellini 
> >> Sent: 2021��8��28�� 9:06
> >> To: Wei Chen 
> >> Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org;
> jul...@xen.org;
> >> jbeul...@suse.com; Bertrand Marquis 
> >> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> >> device tree memory node
> >>
> >> On Wed, 11 Aug 2021, Wei Chen wrote:
> >>> Memory blocks' NUMA ID information is stored in device tree's
> >>> memory nodes as "numa-node-id". We need a new helper to parse
> >>> and verify this ID from memory nodes.
> >>>
> >>> In order to support memory affinity in later use, the valid
> >>> memory ranges and NUMA ID will be saved to tables.
> >>>
> >>> Signed-off-by: Wei Chen 
> >>> ---
> >>>   xen/arch/arm/numa_device_tree.c | 130
> 
> >>>   1 file changed, 130 insertions(+)
> >>>
> >>> diff --git a/xen/arch/arm/numa_device_tree.c
> >> b/xen/arch/arm/numa_device_tree.c
> >>> index 37cc56acf3..bbe081dcd1 100644
> >>> --- a/xen/arch/arm/numa_device_tree.c
> >>> +++ b/xen/arch/arm/numa_device_tree.c
> >>> @@ -20,11 +20,13 @@
> >>>   #include 
> >>>   #include 
> >>>   #include 
> >>> +#include 
> >>>   #include 
> >>>   #include 
> >>>
> >>>   s8 device_tree_numa = 0;
> >>>   static nodemask_t processor_nodes_parsed __initdata;
> >>> +static nodemask_t memory_nodes_parsed __initdata;
> >>>
> >>>   static int srat_disabled(void)
> >>>   {
> >>> @@ -55,6 +57,79 @@ static int __init
> >> dtb_numa_processor_affinity_init(nodeid_t node)
> >>>   return 0;
> >>>   }
> >>>
> >>> +/* Callback for parsing of the memory regions affinity */
> >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> >>> +paddr_t start, paddr_t size)
> >>> +{
> >>> +struct node *nd;
> >>> +paddr_t end;
> >>> +int i;
> >>> +
> >>> +if ( srat_disabled() )
> >>> +return -EINVAL;
> >>> +
> >>> +end = start + size;
> >>> +if ( num_node_memblks >= NR_NODE_MEMBLKS )
> >>> +{
> >>> +dprintk(XENLOG_WARNING,
> >>> +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> >>> +bad_srat();
> >>> +return -EINVAL;
> >>> +}
> >>> +
> >>> +/* It is fine to add this area to the nodes data it will be used
> >> later */
> >>> +i = conflicting_memblks(start, end);
> >>> +/* No conflicting memory block, we can save it for later usage */;
> >>> +if ( i < 0 )
> >>> +goto save_memblk;
> >>> +
> >>> +if ( memblk_nodeid[i] == node ) {
> >>> +/*
> >>> + * Overlaps with other memblk in the same node, warning here.
> >>> + * This memblk will be merged with conflicted memblk later.
> >>> + */
> >>> +printk(XENLOG_WARNING
> >>> +   "DT: NUMA NODE %u (%"PRIx64
> >>> +   "-%"PRIx64") overlaps with itself (%"PRIx64"-
> >> %"PRIx64")\n",
> >>> +   node, start, end,
> >>> +   node_memblk_range[i].start, node_memblk_range[i].end);
> >>> +} else {
> >>> +/*
> >>> + * Conflict with memblk in other node, this is an error.
> >>> + * The NUMA information is invalid, NUMA will be turn off.
> >>> + */
> >>> +printk(XENLOG_ERR
> >>> +   "DT: NUMA NODE %u (%"PRIx64"-%"
> >>> +   PRIx64") overlaps with NODE %u (%"PRIx64"-
> %"PRIx64"

Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-28 Thread Julien Grall

Hi Wei,

On 28/08/2021 04:56, Wei Chen wrote:

-Original Message-
From: Stefano Stabellini 
Sent: 2021年8月28日 9:06
To: Wei Chen 
Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org; jul...@xen.org;
jbeul...@suse.com; Bertrand Marquis 
Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
device tree memory node

On Wed, 11 Aug 2021, Wei Chen wrote:

Memory blocks' NUMA ID information is stored in device tree's
memory nodes as "numa-node-id". We need a new helper to parse
and verify this ID from memory nodes.

In order to support memory affinity in later use, the valid
memory ranges and NUMA ID will be saved to tables.

Signed-off-by: Wei Chen 
---
  xen/arch/arm/numa_device_tree.c | 130 
  1 file changed, 130 insertions(+)

diff --git a/xen/arch/arm/numa_device_tree.c

b/xen/arch/arm/numa_device_tree.c

index 37cc56acf3..bbe081dcd1 100644
--- a/xen/arch/arm/numa_device_tree.c
+++ b/xen/arch/arm/numa_device_tree.c
@@ -20,11 +20,13 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 

  s8 device_tree_numa = 0;
  static nodemask_t processor_nodes_parsed __initdata;
+static nodemask_t memory_nodes_parsed __initdata;

  static int srat_disabled(void)
  {
@@ -55,6 +57,79 @@ static int __init

dtb_numa_processor_affinity_init(nodeid_t node)

  return 0;
  }

+/* Callback for parsing of the memory regions affinity */
+static int __init dtb_numa_memory_affinity_init(nodeid_t node,
+paddr_t start, paddr_t size)
+{
+struct node *nd;
+paddr_t end;
+int i;
+
+if ( srat_disabled() )
+return -EINVAL;
+
+end = start + size;
+if ( num_node_memblks >= NR_NODE_MEMBLKS )
+{
+dprintk(XENLOG_WARNING,
+"Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
+bad_srat();
+return -EINVAL;
+}
+
+/* It is fine to add this area to the nodes data it will be used

later */

+i = conflicting_memblks(start, end);
+/* No conflicting memory block, we can save it for later usage */;
+if ( i < 0 )
+goto save_memblk;
+
+if ( memblk_nodeid[i] == node ) {
+/*
+ * Overlaps with other memblk in the same node, warning here.
+ * This memblk will be merged with conflicted memblk later.
+ */
+printk(XENLOG_WARNING
+   "DT: NUMA NODE %u (%"PRIx64
+   "-%"PRIx64") overlaps with itself (%"PRIx64"-

%"PRIx64")\n",

+   node, start, end,
+   node_memblk_range[i].start, node_memblk_range[i].end);
+} else {
+/*
+ * Conflict with memblk in other node, this is an error.
+ * The NUMA information is invalid, NUMA will be turn off.
+ */
+printk(XENLOG_ERR
+   "DT: NUMA NODE %u (%"PRIx64"-%"
+   PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n",
+   node, start, end, memblk_nodeid[i],
+   node_memblk_range[i].start, node_memblk_range[i].end);
+bad_srat();
+return -EINVAL;
+}
+
+save_memblk:
+nd = &nodes[node];
+if ( !node_test_and_set(node, memory_nodes_parsed) ) {
+nd->start = start;
+nd->end = end;
+} else {
+if ( start < nd->start )
+nd->start = start;
+if ( nd->end < end )
+nd->end = end;
+}
+
+printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n",
+   node, start, end);
+
+node_memblk_range[num_node_memblks].start = start;
+node_memblk_range[num_node_memblks].end = end;
+memblk_nodeid[num_node_memblks] = node;
+num_node_memblks++;



Is it possible to have non-contigous ranges of memory for a single NUMA
node?

Looking at the DT bindings and Linux implementation, it seems possible.
Here, it seems that node_memblk_range/memblk_nodeid could handle it,
but nodes couldn't.


Yes, you're right. I copied this code for x86 ACPI NUMA. Does ACPI allow
non-contiguous ranges of memory for a single NUMA node too? 


I couldn't find any restriction for ACPI. Although, I only briefly 
looked at the spec.



If yes, I think
this will affect x86 ACPI NUMA too. In next version, we plan to merge
dtb_numa_memory_affinity_init and acpi_numa_memory_affinity_init into a
neutral function. So we can fix them at the same time.

If not, maybe we have to keep the diversity for dtb and ACPI here.


I am not entirely sure what you mean. Are you saying if ACPI doesn't 
allow non-contiguous ranges of memory, then we should keep the 
implementation separated?


If so, then I disagree with that. It is fine to have code that supports 
more than what a firmware table supports. The main benefit is less code 
and therefore less long term maintenance (with the current solution we 
would need to check both the ACPI and DT implementation if there is a 
bug in one).


Cheers,

--
Julien Grall



RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-27 Thread Wei Chen
Hi Stefano,

> -Original Message-
> From: Stefano Stabellini 
> Sent: 2021年8月28日 9:06
> To: Wei Chen 
> Cc: xen-devel@lists.xenproject.org; sstabell...@kernel.org; jul...@xen.org;
> jbeul...@suse.com; Bertrand Marquis 
> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> device tree memory node
> 
> On Wed, 11 Aug 2021, Wei Chen wrote:
> > Memory blocks' NUMA ID information is stored in device tree's
> > memory nodes as "numa-node-id". We need a new helper to parse
> > and verify this ID from memory nodes.
> >
> > In order to support memory affinity in later use, the valid
> > memory ranges and NUMA ID will be saved to tables.
> >
> > Signed-off-by: Wei Chen 
> > ---
> >  xen/arch/arm/numa_device_tree.c | 130 
> >  1 file changed, 130 insertions(+)
> >
> > diff --git a/xen/arch/arm/numa_device_tree.c
> b/xen/arch/arm/numa_device_tree.c
> > index 37cc56acf3..bbe081dcd1 100644
> > --- a/xen/arch/arm/numa_device_tree.c
> > +++ b/xen/arch/arm/numa_device_tree.c
> > @@ -20,11 +20,13 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >
> >  s8 device_tree_numa = 0;
> >  static nodemask_t processor_nodes_parsed __initdata;
> > +static nodemask_t memory_nodes_parsed __initdata;
> >
> >  static int srat_disabled(void)
> >  {
> > @@ -55,6 +57,79 @@ static int __init
> dtb_numa_processor_affinity_init(nodeid_t node)
> >  return 0;
> >  }
> >
> > +/* Callback for parsing of the memory regions affinity */
> > +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> > +paddr_t start, paddr_t size)
> > +{
> > +struct node *nd;
> > +paddr_t end;
> > +int i;
> > +
> > +if ( srat_disabled() )
> > +return -EINVAL;
> > +
> > +end = start + size;
> > +if ( num_node_memblks >= NR_NODE_MEMBLKS )
> > +{
> > +dprintk(XENLOG_WARNING,
> > +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> > +bad_srat();
> > +return -EINVAL;
> > +}
> > +
> > +/* It is fine to add this area to the nodes data it will be used
> later */
> > +i = conflicting_memblks(start, end);
> > +/* No conflicting memory block, we can save it for later usage */;
> > +if ( i < 0 )
> > +goto save_memblk;
> > +
> > +if ( memblk_nodeid[i] == node ) {
> > +/*
> > + * Overlaps with other memblk in the same node, warning here.
> > + * This memblk will be merged with conflicted memblk later.
> > + */
> > +printk(XENLOG_WARNING
> > +   "DT: NUMA NODE %u (%"PRIx64
> > +   "-%"PRIx64") overlaps with itself (%"PRIx64"-
> %"PRIx64")\n",
> > +   node, start, end,
> > +   node_memblk_range[i].start, node_memblk_range[i].end);
> > +} else {
> > +/*
> > + * Conflict with memblk in other node, this is an error.
> > + * The NUMA information is invalid, NUMA will be turn off.
> > + */
> > +printk(XENLOG_ERR
> > +   "DT: NUMA NODE %u (%"PRIx64"-%"
> > +   PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n",
> > +   node, start, end, memblk_nodeid[i],
> > +   node_memblk_range[i].start, node_memblk_range[i].end);
> > +bad_srat();
> > +return -EINVAL;
> > +}
> > +
> > +save_memblk:
> > +nd = &nodes[node];
> > +if ( !node_test_and_set(node, memory_nodes_parsed) ) {
> > +nd->start = start;
> > +nd->end = end;
> > +} else {
> > +if ( start < nd->start )
> > +nd->start = start;
> > +if ( nd->end < end )
> > +nd->end = end;
> > +}
> > +
> > +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n",
> > +   node, start, end);
> > +
> > +node_memblk_range[num_node_memblks].start = start;
> > +node_memblk_range[num_node_memblks].end = end;
> > +memblk_nodeid[num_node_memblks] = node;
> > +num_node_memblks++;
> 
> 
> Is it possible to have non-contigous ranges of memory for a single NUMA
> node?
> 
> Looking at the DT bindings and Linux implementation, it seems possible.
> Here, it seems that node_memblk_range/memblk_nodeid could handle it,
> but nodes couldn't.

Yes, you're right. I copied this code for x86 ACPI NUMA. Does ACPI allow
non-contiguous ranges of memory for a single NUMA node too? If yes, I think
this will affect x86 ACPI NUMA too. In next version, we plan to merge
dtb_numa_memory_affinity_init and acpi_numa_memory_affinity_init into a
neutral function. So we can fix them at the same time.

If not, maybe we have to keep the diversity for dtb and ACPI here.
Anyway, Thanks for pointing this, I will look into the latest Linux
implementation.

Cheers,
Wei Chen




Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-27 Thread Stefano Stabellini
On Wed, 11 Aug 2021, Wei Chen wrote:
> Memory blocks' NUMA ID information is stored in device tree's
> memory nodes as "numa-node-id". We need a new helper to parse
> and verify this ID from memory nodes.
> 
> In order to support memory affinity in later use, the valid
> memory ranges and NUMA ID will be saved to tables.
> 
> Signed-off-by: Wei Chen 
> ---
>  xen/arch/arm/numa_device_tree.c | 130 
>  1 file changed, 130 insertions(+)
> 
> diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c
> index 37cc56acf3..bbe081dcd1 100644
> --- a/xen/arch/arm/numa_device_tree.c
> +++ b/xen/arch/arm/numa_device_tree.c
> @@ -20,11 +20,13 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
>  s8 device_tree_numa = 0;
>  static nodemask_t processor_nodes_parsed __initdata;
> +static nodemask_t memory_nodes_parsed __initdata;
>  
>  static int srat_disabled(void)
>  {
> @@ -55,6 +57,79 @@ static int __init 
> dtb_numa_processor_affinity_init(nodeid_t node)
>  return 0;
>  }
>  
> +/* Callback for parsing of the memory regions affinity */
> +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> +paddr_t start, paddr_t size)
> +{
> +struct node *nd;
> +paddr_t end;
> +int i;
> +
> +if ( srat_disabled() )
> +return -EINVAL;
> +
> +end = start + size;
> +if ( num_node_memblks >= NR_NODE_MEMBLKS )
> +{
> +dprintk(XENLOG_WARNING,
> +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> +bad_srat();
> +return -EINVAL;
> +}
> +
> +/* It is fine to add this area to the nodes data it will be used later */
> +i = conflicting_memblks(start, end);
> +/* No conflicting memory block, we can save it for later usage */;
> +if ( i < 0 )
> +goto save_memblk;
> +
> +if ( memblk_nodeid[i] == node ) {
> +/*
> + * Overlaps with other memblk in the same node, warning here.
> + * This memblk will be merged with conflicted memblk later.
> + */
> +printk(XENLOG_WARNING
> +   "DT: NUMA NODE %u (%"PRIx64
> +   "-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n",
> +   node, start, end,
> +   node_memblk_range[i].start, node_memblk_range[i].end);
> +} else {
> +/*
> + * Conflict with memblk in other node, this is an error.
> + * The NUMA information is invalid, NUMA will be turn off.
> + */
> +printk(XENLOG_ERR
> +   "DT: NUMA NODE %u (%"PRIx64"-%"
> +   PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n",
> +   node, start, end, memblk_nodeid[i],
> +   node_memblk_range[i].start, node_memblk_range[i].end);
> +bad_srat();
> +return -EINVAL;
> +}
> +
> +save_memblk:
> +nd = &nodes[node];
> +if ( !node_test_and_set(node, memory_nodes_parsed) ) {
> +nd->start = start;
> +nd->end = end;
> +} else {
> +if ( start < nd->start )
> +nd->start = start;
> +if ( nd->end < end )
> +nd->end = end;
> +}
> +
> +printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n",
> +   node, start, end);
> +
> +node_memblk_range[num_node_memblks].start = start;
> +node_memblk_range[num_node_memblks].end = end;
> +memblk_nodeid[num_node_memblks] = node;
> +num_node_memblks++;


Is it possible to have non-contigous ranges of memory for a single NUMA
node?

Looking at the DT bindings and Linux implementation, it seems possible.
Here, it seems that node_memblk_range/memblk_nodeid could handle it,
but nodes couldn't.



RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-26 Thread Wei Chen
Hi Julien,

> -Original Message-
> From: Julien Grall 
> Sent: 2021年8月26日 16:22
> To: Wei Chen ; xen-devel@lists.xenproject.org;
> sstabell...@kernel.org
> Cc: Bertrand Marquis 
> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> device tree memory node
> 
> 
> 
> On 26/08/2021 07:35, Wei Chen wrote:
> > Hi Julien,
> 
> Hi Wei,
> 
> >> -Original Message-
> >> From: Julien Grall 
> >> Sent: 2021年8月25日 21:49
> >> To: Wei Chen ; xen-devel@lists.xenproject.org;
> >> sstabell...@kernel.org; jbeul...@suse.com
> >> Cc: Bertrand Marquis 
> >> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> >> device tree memory node
> >>
> >> Hi Wei,
> >>
> >> On 11/08/2021 11:24, Wei Chen wrote:
> >>> Memory blocks' NUMA ID information is stored in device tree's
> >>> memory nodes as "numa-node-id". We need a new helper to parse
> >>> and verify this ID from memory nodes.
> >>>
> >>> In order to support memory affinity in later use, the valid
> >>> memory ranges and NUMA ID will be saved to tables.
> >>>
> >>> Signed-off-by: Wei Chen 
> >>> ---
> >>>xen/arch/arm/numa_device_tree.c | 130
> 
> >>>1 file changed, 130 insertions(+)
> >>>
> >>> diff --git a/xen/arch/arm/numa_device_tree.c
> >> b/xen/arch/arm/numa_device_tree.c
> >>> index 37cc56acf3..bbe081dcd1 100644
> >>> --- a/xen/arch/arm/numa_device_tree.c
> >>> +++ b/xen/arch/arm/numa_device_tree.c
> >>> @@ -20,11 +20,13 @@
> >>>#include 
> >>>#include 
> >>>#include 
> >>> +#include 
> >>>#include 
> >>>#include 
> >>>
> >>>s8 device_tree_numa = 0;
> >>>static nodemask_t processor_nodes_parsed __initdata;
> >>> +static nodemask_t memory_nodes_parsed __initdata;
> >>>
> >>>static int srat_disabled(void)
> >>>{
> >>> @@ -55,6 +57,79 @@ static int __init
> >> dtb_numa_processor_affinity_init(nodeid_t node)
> >>>return 0;
> >>>}
> >>>
> >>> +/* Callback for parsing of the memory regions affinity */
> >>> +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> >>> +paddr_t start, paddr_t size)
> >>> +{
> >>
> >> The implementation of this function is quite similar ot the ACPI
> >> version. Can this be abstracted?
> >
> > In my draft, I had tried to merge ACPI and DTB versions in one
> > function. I introduced a number of "if else" to distinguish ACPI
> > from DTB, especially ACPI hotplug. The function seems very messy.
> > Not enough benefits to make up for the mess, so I gave up.
> 
> It think you can get away from distinguishing between ACPI and DT in
> that helper:
>* ma->flags & ACPI_SRAT_MEM_HOTPLUGGABLE could be replace by an
> argument indicating whether the region is hotpluggable (this would
> always be false for DT)
>* Access to memblk_hotplug can be stubbed (in the future we may want
> to consider memory hotplug even on Arm).
> 
> Do you still have the "if else" version? If so can you post it?
> 

I just tried to do that in draft process, because I was not satisfied
with the changes, I haven't saved them as a patch.

I think your suggestions are worth to try again, I will do it
in next version.


> Cheers,
> 
> --
> Julien Grall


Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-26 Thread Julien Grall




On 26/08/2021 07:35, Wei Chen wrote:

Hi Julien,


Hi Wei,


-Original Message-
From: Julien Grall 
Sent: 2021年8月25日 21:49
To: Wei Chen ; xen-devel@lists.xenproject.org;
sstabell...@kernel.org; jbeul...@suse.com
Cc: Bertrand Marquis 
Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
device tree memory node

Hi Wei,

On 11/08/2021 11:24, Wei Chen wrote:

Memory blocks' NUMA ID information is stored in device tree's
memory nodes as "numa-node-id". We need a new helper to parse
and verify this ID from memory nodes.

In order to support memory affinity in later use, the valid
memory ranges and NUMA ID will be saved to tables.

Signed-off-by: Wei Chen 
---
   xen/arch/arm/numa_device_tree.c | 130 
   1 file changed, 130 insertions(+)

diff --git a/xen/arch/arm/numa_device_tree.c

b/xen/arch/arm/numa_device_tree.c

index 37cc56acf3..bbe081dcd1 100644
--- a/xen/arch/arm/numa_device_tree.c
+++ b/xen/arch/arm/numa_device_tree.c
@@ -20,11 +20,13 @@
   #include 
   #include 
   #include 
+#include 
   #include 
   #include 

   s8 device_tree_numa = 0;
   static nodemask_t processor_nodes_parsed __initdata;
+static nodemask_t memory_nodes_parsed __initdata;

   static int srat_disabled(void)
   {
@@ -55,6 +57,79 @@ static int __init

dtb_numa_processor_affinity_init(nodeid_t node)

   return 0;
   }

+/* Callback for parsing of the memory regions affinity */
+static int __init dtb_numa_memory_affinity_init(nodeid_t node,
+paddr_t start, paddr_t size)
+{


The implementation of this function is quite similar ot the ACPI
version. Can this be abstracted?


In my draft, I had tried to merge ACPI and DTB versions in one
function. I introduced a number of "if else" to distinguish ACPI
from DTB, especially ACPI hotplug. The function seems very messy.
Not enough benefits to make up for the mess, so I gave up.


It think you can get away from distinguishing between ACPI and DT in 
that helper:
  * ma->flags & ACPI_SRAT_MEM_HOTPLUGGABLE could be replace by an 
argument indicating whether the region is hotpluggable (this would 
always be false for DT)
  * Access to memblk_hotplug can be stubbed (in the future we may want 
to consider memory hotplug even on Arm).


Do you still have the "if else" version? If so can you post it?

Cheers,

--
Julien Grall



RE: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-25 Thread Wei Chen
Hi Julien,

> -Original Message-
> From: Julien Grall 
> Sent: 2021年8月25日 21:49
> To: Wei Chen ; xen-devel@lists.xenproject.org;
> sstabell...@kernel.org; jbeul...@suse.com
> Cc: Bertrand Marquis 
> Subject: Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse
> device tree memory node
> 
> Hi Wei,
> 
> On 11/08/2021 11:24, Wei Chen wrote:
> > Memory blocks' NUMA ID information is stored in device tree's
> > memory nodes as "numa-node-id". We need a new helper to parse
> > and verify this ID from memory nodes.
> >
> > In order to support memory affinity in later use, the valid
> > memory ranges and NUMA ID will be saved to tables.
> >
> > Signed-off-by: Wei Chen 
> > ---
> >   xen/arch/arm/numa_device_tree.c | 130 
> >   1 file changed, 130 insertions(+)
> >
> > diff --git a/xen/arch/arm/numa_device_tree.c
> b/xen/arch/arm/numa_device_tree.c
> > index 37cc56acf3..bbe081dcd1 100644
> > --- a/xen/arch/arm/numa_device_tree.c
> > +++ b/xen/arch/arm/numa_device_tree.c
> > @@ -20,11 +20,13 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> >   #include 
> >   #include 
> >
> >   s8 device_tree_numa = 0;
> >   static nodemask_t processor_nodes_parsed __initdata;
> > +static nodemask_t memory_nodes_parsed __initdata;
> >
> >   static int srat_disabled(void)
> >   {
> > @@ -55,6 +57,79 @@ static int __init
> dtb_numa_processor_affinity_init(nodeid_t node)
> >   return 0;
> >   }
> >
> > +/* Callback for parsing of the memory regions affinity */
> > +static int __init dtb_numa_memory_affinity_init(nodeid_t node,
> > +paddr_t start, paddr_t size)
> > +{
> 
> The implementation of this function is quite similar ot the ACPI
> version. Can this be abstracted?

In my draft, I had tried to merge ACPI and DTB versions in one
function. I introduced a number of "if else" to distinguish ACPI
from DTB, especially ACPI hotplug. The function seems very messy.
Not enough benefits to make up for the mess, so I gave up.

But, yes, maybe we can abstract some common code to functions, that
can be reused in two functions. I would try it in next version.


> 
> > +struct node *nd;
> > +paddr_t end;
> > +int i;
> > +
> > +if ( srat_disabled() )
> > +return -EINVAL;
> > +
> > +end = start + size;
> > +if ( num_node_memblks >= NR_NODE_MEMBLKS )
> > +{
> > +dprintk(XENLOG_WARNING,
> > +"Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
> > +bad_srat();
> > +return -EINVAL;
> > +}
> > +
> > +/* It is fine to add this area to the nodes data it will be used
> later */
> > +i = conflicting_memblks(start, end);
> > +/* No conflicting memory block, we can save it for later usage */;
> > +if ( i < 0 )
> > +goto save_memblk;
> > +
> > +if ( memblk_nodeid[i] == node ) {
> 
> Xen coding style is using:
> 
> if ( ... )
> {
> 
> Note that I may not comment on all the occurents, so please check the
> other places.
> 

Ok.


> > +/*
> > + * Overlaps with other memblk in the same node, warning here.
> > + * This memblk will be merged with conflicted memblk later.
> > + */
> > +printk(XENLOG_WARNING
> > +   "DT: NUMA NODE %u (%"PRIx64
> > +   "-%"PRIx64") overlaps with itself (%"PRIx64"-
> %"PRIx64")\n",
> > +   node, start, end,
> > +   node_memblk_range[i].start, node_memblk_range[i].end);
> > +} else {
> > +/*
> > + * Conflict with memblk in other node, this is an error.
> > + * The NUMA information is invalid, NUMA will be turn off.
> > + */
> > +printk(XENLOG_ERR
> > +   "DT: NUMA NODE %u (%"PRIx64"-%"
> > +   PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n",
> > +   node, start, end, memblk_nodeid[i],
> > +   node_memblk_range[i].start, node_memblk_range[i].end);
> > +bad_srat();
> > +return -EINVAL;
> > +}
> > +
> > +save_memblk:
> > +nd = &nodes[node];
> > +if ( !node_test_and_set(node, memory_nodes_parsed) ) {
> > +nd->start = start;
> > +

Re: [XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-25 Thread Julien Grall

Hi Wei,

On 11/08/2021 11:24, Wei Chen wrote:

Memory blocks' NUMA ID information is stored in device tree's
memory nodes as "numa-node-id". We need a new helper to parse
and verify this ID from memory nodes.

In order to support memory affinity in later use, the valid
memory ranges and NUMA ID will be saved to tables.

Signed-off-by: Wei Chen 
---
  xen/arch/arm/numa_device_tree.c | 130 
  1 file changed, 130 insertions(+)

diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c
index 37cc56acf3..bbe081dcd1 100644
--- a/xen/arch/arm/numa_device_tree.c
+++ b/xen/arch/arm/numa_device_tree.c
@@ -20,11 +20,13 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  
  s8 device_tree_numa = 0;

  static nodemask_t processor_nodes_parsed __initdata;
+static nodemask_t memory_nodes_parsed __initdata;
  
  static int srat_disabled(void)

  {
@@ -55,6 +57,79 @@ static int __init dtb_numa_processor_affinity_init(nodeid_t 
node)
  return 0;
  }
  
+/* Callback for parsing of the memory regions affinity */

+static int __init dtb_numa_memory_affinity_init(nodeid_t node,
+paddr_t start, paddr_t size)
+{


The implementation of this function is quite similar ot the ACPI 
version. Can this be abstracted?



+struct node *nd;
+paddr_t end;
+int i;
+
+if ( srat_disabled() )
+return -EINVAL;
+
+end = start + size;
+if ( num_node_memblks >= NR_NODE_MEMBLKS )
+{
+dprintk(XENLOG_WARNING,
+"Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
+bad_srat();
+return -EINVAL;
+}
+
+/* It is fine to add this area to the nodes data it will be used later */
+i = conflicting_memblks(start, end);
+/* No conflicting memory block, we can save it for later usage */;
+if ( i < 0 )
+goto save_memblk;
+
+if ( memblk_nodeid[i] == node ) {


Xen coding style is using:

if ( ... )
{

Note that I may not comment on all the occurents, so please check the 
other places.



+/*
+ * Overlaps with other memblk in the same node, warning here.
+ * This memblk will be merged with conflicted memblk later.
+ */
+printk(XENLOG_WARNING
+   "DT: NUMA NODE %u (%"PRIx64
+   "-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n",
+   node, start, end,
+   node_memblk_range[i].start, node_memblk_range[i].end);
+} else {
+/*
+ * Conflict with memblk in other node, this is an error.
+ * The NUMA information is invalid, NUMA will be turn off.
+ */
+printk(XENLOG_ERR
+   "DT: NUMA NODE %u (%"PRIx64"-%"
+   PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n",
+   node, start, end, memblk_nodeid[i],
+   node_memblk_range[i].start, node_memblk_range[i].end);
+bad_srat();
+return -EINVAL;
+}
+
+save_memblk:
+nd = &nodes[node];
+if ( !node_test_and_set(node, memory_nodes_parsed) ) {
+nd->start = start;
+nd->end = end;
+} else {
+if ( start < nd->start )
+nd->start = start;
+if ( nd->end < end )
+nd->end = end;
+}
+
+printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n",
+   node, start, end);
+
+node_memblk_range[num_node_memblks].start = start;
+node_memblk_range[num_node_memblks].end = end;
+memblk_nodeid[num_node_memblks] = node;
+num_node_memblks++;
+
+return 0;
+}
+
  /* Parse CPU NUMA node info */
  int __init device_tree_parse_numa_cpu_node(const void *fdt, int node)
  {
@@ -70,3 +145,58 @@ int __init device_tree_parse_numa_cpu_node(const void *fdt, 
int node)
  
  return dtb_numa_processor_affinity_init(nid);

  }
+
+/* Parse memory node NUMA info */
+int __init
+device_tree_parse_numa_memory_node(const void *fdt, int node,
+const char *name, uint32_t addr_cells, uint32_t size_cells)


This is pretty much a copy of process_memory_node(). Can we consider to 
collect the NUMA ID from there? If not, can we at least abstract the code?



+{
+uint32_t nid;
+int ret = 0, len;
+paddr_t addr, size;
+const struct fdt_property *prop;
+uint32_t idx, ranges;
+const __be32 *addresses;
+
+nid = device_tree_get_u32(fdt, node, "numa-node-id", MAX_NUMNODES);
+if ( nid >= MAX_NUMNODES )
+{
+printk(XENLOG_WARNING "Node id %u exceeds maximum value\n", nid);
+return -EINVAL;
+}
+
+prop = fdt_get_property(fdt, node, "reg", &len);
+if ( !prop )
+{
+printk(XENLOG_WARNING
+   "fdt: node `%s': missing `reg' property\n", name);
+return -EINVAL;
+}
+
+addresses = (const __be32 *)prop->data;
+ranges = len / (sizeof(__be32)* (addr_cells + size_cells));
+for ( idx = 0; idx < ranges; idx++ )
+{
+device_tree_get_reg(&addresses, addr_cells

[XEN RFC PATCH 23/40] xen/arm: introduce a helper to parse device tree memory node

2021-08-11 Thread Wei Chen
Memory blocks' NUMA ID information is stored in device tree's
memory nodes as "numa-node-id". We need a new helper to parse
and verify this ID from memory nodes.

In order to support memory affinity in later use, the valid
memory ranges and NUMA ID will be saved to tables.

Signed-off-by: Wei Chen 
---
 xen/arch/arm/numa_device_tree.c | 130 
 1 file changed, 130 insertions(+)

diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c
index 37cc56acf3..bbe081dcd1 100644
--- a/xen/arch/arm/numa_device_tree.c
+++ b/xen/arch/arm/numa_device_tree.c
@@ -20,11 +20,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
 s8 device_tree_numa = 0;
 static nodemask_t processor_nodes_parsed __initdata;
+static nodemask_t memory_nodes_parsed __initdata;
 
 static int srat_disabled(void)
 {
@@ -55,6 +57,79 @@ static int __init dtb_numa_processor_affinity_init(nodeid_t 
node)
 return 0;
 }
 
+/* Callback for parsing of the memory regions affinity */
+static int __init dtb_numa_memory_affinity_init(nodeid_t node,
+paddr_t start, paddr_t size)
+{
+struct node *nd;
+paddr_t end;
+int i;
+
+if ( srat_disabled() )
+return -EINVAL;
+
+end = start + size;
+if ( num_node_memblks >= NR_NODE_MEMBLKS )
+{
+dprintk(XENLOG_WARNING,
+"Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
+bad_srat();
+return -EINVAL;
+}
+
+/* It is fine to add this area to the nodes data it will be used later */
+i = conflicting_memblks(start, end);
+/* No conflicting memory block, we can save it for later usage */;
+if ( i < 0 )
+goto save_memblk;
+
+if ( memblk_nodeid[i] == node ) {
+/*
+ * Overlaps with other memblk in the same node, warning here.
+ * This memblk will be merged with conflicted memblk later.
+ */
+printk(XENLOG_WARNING
+   "DT: NUMA NODE %u (%"PRIx64
+   "-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n",
+   node, start, end,
+   node_memblk_range[i].start, node_memblk_range[i].end);
+} else {
+/*
+ * Conflict with memblk in other node, this is an error.
+ * The NUMA information is invalid, NUMA will be turn off.
+ */
+printk(XENLOG_ERR
+   "DT: NUMA NODE %u (%"PRIx64"-%"
+   PRIx64") overlaps with NODE %u (%"PRIx64"-%"PRIx64")\n",
+   node, start, end, memblk_nodeid[i],
+   node_memblk_range[i].start, node_memblk_range[i].end);
+bad_srat();
+return -EINVAL;
+}
+
+save_memblk:
+nd = &nodes[node];
+if ( !node_test_and_set(node, memory_nodes_parsed) ) {
+nd->start = start;
+nd->end = end;
+} else {
+if ( start < nd->start )
+nd->start = start;
+if ( nd->end < end )
+nd->end = end;
+}
+
+printk(XENLOG_INFO "DT: NUMA node %u %"PRIx64"-%"PRIx64"\n",
+   node, start, end);
+
+node_memblk_range[num_node_memblks].start = start;
+node_memblk_range[num_node_memblks].end = end;
+memblk_nodeid[num_node_memblks] = node;
+num_node_memblks++;
+
+return 0;
+}
+
 /* Parse CPU NUMA node info */
 int __init device_tree_parse_numa_cpu_node(const void *fdt, int node)
 {
@@ -70,3 +145,58 @@ int __init device_tree_parse_numa_cpu_node(const void *fdt, 
int node)
 
 return dtb_numa_processor_affinity_init(nid);
 }
+
+/* Parse memory node NUMA info */
+int __init
+device_tree_parse_numa_memory_node(const void *fdt, int node,
+const char *name, uint32_t addr_cells, uint32_t size_cells)
+{
+uint32_t nid;
+int ret = 0, len;
+paddr_t addr, size;
+const struct fdt_property *prop;
+uint32_t idx, ranges;
+const __be32 *addresses;
+
+nid = device_tree_get_u32(fdt, node, "numa-node-id", MAX_NUMNODES);
+if ( nid >= MAX_NUMNODES )
+{
+printk(XENLOG_WARNING "Node id %u exceeds maximum value\n", nid);
+return -EINVAL;
+}
+
+prop = fdt_get_property(fdt, node, "reg", &len);
+if ( !prop )
+{
+printk(XENLOG_WARNING
+   "fdt: node `%s': missing `reg' property\n", name);
+return -EINVAL;
+}
+
+addresses = (const __be32 *)prop->data;
+ranges = len / (sizeof(__be32)* (addr_cells + size_cells));
+for ( idx = 0; idx < ranges; idx++ )
+{
+device_tree_get_reg(&addresses, addr_cells, size_cells, &addr, &size);
+/* Skip zero size ranges */
+if ( !size )
+continue;
+
+ret = dtb_numa_memory_affinity_init(nid, addr, size);
+if ( ret ) {
+printk(XENLOG_WARNING
+   "NUMA: process range#%d addr = %lx size=%lx failed!\n",
+   idx, addr, size);
+return -EINVAL;
+}
+}
+
+if ( idx == 0 )
+{
+printk(XENLOG_ERR
+