Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest

2015-01-14 Thread Wei Liu
On Tue, Jan 13, 2015 at 08:02:17PM +, Andrew Cooper wrote:
[...]
  +/* vNUMA information */
  +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */
  +uint64_t *vnode_size; /* vnode size array */
 
 Please make it very clear in the comment here that size is in MB (at
 least I presume so, given the shifts by 20).  There are currently no
 specified units.
 

The size will be in pages.

  +unsigned int nr_vnodes;   /* number of elements of above arrays */
  +
   /* kernel loader */
   struct xc_dom_arch *arch_hooks;
   /* allocate up to virt_alloc_end */
  diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
  index bf06fe4..06a7e54 100644
  --- a/tools/libxc/xc_dom_x86.c
  +++ b/tools/libxc/xc_dom_x86.c
  @@ -759,7 +759,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
   int arch_setup_meminit(struct xc_dom_image *dom)
   {
   int rc;
  -xen_pfn_t pfn, allocsz, i, j, mfn;
  +xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
  +int i, j;
   
   rc = x86_compat(dom-xch, dom-guest_domid, dom-guest_type);
   if ( rc )
  @@ -811,18 +812,74 @@ int arch_setup_meminit(struct xc_dom_image *dom)
   /* setup initial p2m */
   for ( pfn = 0; pfn  dom-total_pages; pfn++ )
   dom-p2m_host[pfn] = pfn;
  -
  +
  +/* Setup dummy vNUMA information if it's not provided. Not
  + * that this is a valid state if libxl doesn't provide any
  + * vNUMA information.
  + *
  + * In this case we setup some dummy value for the convenience
  + * of the allocation code. Note that from the user's PoV the
  + * guest still has no vNUMA configuration.
  + */
  +if ( dom-nr_vnodes == 0 )
  +{
  +dom-nr_vnodes = 1;
  +dom-vnode_to_pnode = xc_dom_malloc(dom,
  +
  sizeof(*dom-vnode_to_pnode));
  +dom-vnode_to_pnode[0] = XC_VNUMA_NO_NODE;
  +dom-vnode_size = xc_dom_malloc(dom, sizeof(*dom-vnode_size));
  +dom-vnode_size[0] = (dom-total_pages  PAGE_SHIFT)  20;
  +}
  +
  +total = 0;
  +for ( i = 0; i  dom-nr_vnodes; i++ )
  +total += ((dom-vnode_size[i]  20)  PAGE_SHIFT);
 
 Can I suggest a mb_to_pages() helper rather than opencoding this in
 several locations.
 

Sure. If I end up needing one I will add that helper.

  +if ( total != dom-total_pages )
  +{
  +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR,
  + %s: vNUMA page count mismatch (0x%PRIpfn != 
  0x%PRIpfn)\n,
  + __FUNCTION__, total, dom-total_pages);
 
 __func__ please.  It is part of C99 unlike __FUNCTION__ which is a gnuism.
 
 andrewcoop:xen.git$ git grep  __FUNCTION__ | wc -l
 230
 andrewcoop:xen.git$ git grep  __func__ | wc -l
 194
 
 Looks like the codebase is very mixed, but best to err on the side of
 the standard.
 

No problem.

  +return -EINVAL;
  +}
  +
   /* allocate guest memory */
  -for ( i = rc = allocsz = 0;
  -  (i  dom-total_pages)  !rc;
  -  i += allocsz )
  +pfn_base = 0;
  +for ( i = 0; i  dom-nr_vnodes; i++ )
   {
  -allocsz = dom-total_pages - i;
  -if ( allocsz  1024*1024 )
  -allocsz = 1024*1024;
  -rc = xc_domain_populate_physmap_exact(
  -dom-xch, dom-guest_domid, allocsz,
  -0, 0, dom-p2m_host[i]);
  +unsigned int memflags;
  +uint64_t pages;
  +
  +memflags = 0;
  +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE )
  +{
  +memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]);
  +memflags |= XENMEMF_exact_node_request;
  +}
  +
  +pages = (dom-vnode_size[i]  20)  PAGE_SHIFT;
  +
  +for ( j = 0; j  pages; j += allocsz )
  +{
  +allocsz = pages - j;
  +if ( allocsz  1024*1024 )
  +allocsz = 1024*1024;
  +
  +rc = xc_domain_populate_physmap_exact(dom-xch,
  + dom-guest_domid, allocsz, 0, memflags,
  + dom-p2m_host[pfn_base+j]);
  +
  +if ( rc )
  +{
  +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE )
  +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR,
  + %s: fail to allocate 
  0x%PRIx64/0x%PRIpfn pages (v=%d, p=%d)\n,
  + __FUNCTION__, pages, 
  dom-total_pages, i,
  + dom-vnode_to_pnode[i]);
 
 failed to allocate
 

Fixed.

 I am not sure the total number of pages is useful here, especially as
 you don't know how many pages have successfully 

Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest

2015-01-14 Thread Andrew Cooper
On 14/01/15 10:53, Wei Liu wrote:

 +return -EINVAL;
 +}
 +
  /* allocate guest memory */
 -for ( i = rc = allocsz = 0;
 -  (i  dom-total_pages)  !rc;
 -  i += allocsz )
 +pfn_base = 0;
 +for ( i = 0; i  dom-nr_vnodes; i++ )
  {
 -allocsz = dom-total_pages - i;
 -if ( allocsz  1024*1024 )
 -allocsz = 1024*1024;
 -rc = xc_domain_populate_physmap_exact(
 -dom-xch, dom-guest_domid, allocsz,
 -0, 0, dom-p2m_host[i]);
 +unsigned int memflags;
 +uint64_t pages;
 +
 +memflags = 0;
 +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE )
 +{
 +memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]);
 +memflags |= XENMEMF_exact_node_request;
 +}
 +
 +pages = (dom-vnode_size[i]  20)  PAGE_SHIFT;
 +
 +for ( j = 0; j  pages; j += allocsz )
 +{
 +allocsz = pages - j;
 +if ( allocsz  1024*1024 )
 +allocsz = 1024*1024;
 +
 +rc = xc_domain_populate_physmap_exact(dom-xch,
 + dom-guest_domid, allocsz, 0, memflags,
 + dom-p2m_host[pfn_base+j]);
 +
 +if ( rc )
 +{
 +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE )
 +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR,
 + %s: fail to allocate 
 0x%PRIx64/0x%PRIpfn pages (v=%d, p=%d)\n,
 + __FUNCTION__, pages, 
 dom-total_pages, i,
 + dom-vnode_to_pnode[i]);
 failed to allocate

 Fixed.

 I am not sure the total number of pages is useful here, especially as
 you don't know how many pages have successfully been allocated first.

 Are you suggesting we don't print any number of print more numbers?

I am not suggesting that you don't print numbers.  pages is useful to
know in the error message, but d-total_pages is not, as you don't
know how many successful allocations have occurred before, and the total
allocated memory so far.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest

2015-01-13 Thread Ian Jackson
Wei Liu writes ([PATCH v3 03/19] libxc: allocate memory with vNUMA information 
for PV guest):
...
 diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
 index 07d7224..c459e77 100644
 --- a/tools/libxc/include/xc_dom.h
 +++ b/tools/libxc/include/xc_dom.h
 @@ -167,6 +167,11 @@ struct xc_dom_image {
...
 +/* vNUMA information */
 +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */
 +uint64_t *vnode_size; /* vnode size array */

You don't specify the units.  You should probably name the variable
_bytes or _pages or something.

Looking at the algorithm below it seems to be in _mby.  But the domain
size is specified in pages.  So AFAICT if you try to create a domain
which is not a whole number of pages, it is bound to fail !

Perhaps the vnode memory size should be in pages too.

 +unsigned int nr_vnodes;   /* number of elements of above arrays */

Is there some reason to prefer this arrangement with multiple parallel
arrays, to one with a single array of structs ?

 +/* Setup dummy vNUMA information if it's not provided. Not
 + * that this is a valid state if libxl doesn't provide any
 + * vNUMA information.
 + *
 + * In this case we setup some dummy value for the convenience
 + * of the allocation code. Note that from the user's PoV the
 + * guest still has no vNUMA configuration.
 + */

This arrangement for defaulting makes it difficult to supply only
partial information - for example, to supply the number of vnodes but
allow the system to make up the details.

I have a similar complaint about the corresponding libxl code.

I think you should decide where you want the defaulting to be, and do
it in a more flexible way in that one place.  Probably, libxl.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest

2015-01-13 Thread Wei Liu
On Tue, Jan 13, 2015 at 05:05:26PM +, Ian Jackson wrote:
 Wei Liu writes ([PATCH v3 03/19] libxc: allocate memory with vNUMA 
 information for PV guest):
 ...
  diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
  index 07d7224..c459e77 100644
  --- a/tools/libxc/include/xc_dom.h
  +++ b/tools/libxc/include/xc_dom.h
  @@ -167,6 +167,11 @@ struct xc_dom_image {
 ...
  +/* vNUMA information */
  +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */
  +uint64_t *vnode_size; /* vnode size array */
 
 You don't specify the units.  You should probably name the variable
 _bytes or _pages or something.
 
 Looking at the algorithm below it seems to be in _mby.  But the domain
 size is specified in pages.  So AFAICT if you try to create a domain
 which is not a whole number of pages, it is bound to fail !
 
 Perhaps the vnode memory size should be in pages too.
 

Let's use page as unit.

  +unsigned int nr_vnodes;   /* number of elements of above arrays */
 
 Is there some reason to prefer this arrangement with multiple parallel
 arrays, to one with a single array of structs ?
 

No, I don't have preference. I can pack vnode_to_pnode and
vnode_size(_pages) into a struct.

  +/* Setup dummy vNUMA information if it's not provided. Not
  + * that this is a valid state if libxl doesn't provide any
  + * vNUMA information.
  + *
  + * In this case we setup some dummy value for the convenience
  + * of the allocation code. Note that from the user's PoV the
  + * guest still has no vNUMA configuration.
  + */
 
 This arrangement for defaulting makes it difficult to supply only
 partial information - for example, to supply the number of vnodes but
 allow the system to make up the details.
 
 I have a similar complaint about the corresponding libxl code.
 
 I think you should decide where you want the defaulting to be, and do
 it in a more flexible way in that one place.  Probably, libxl.
 

The defaulting will be in libxl. That's what Dario is working on.

If libxl provides information, these dummy values will have no effect.

Maybe the comment is confusing. I wasn't saying there defaulting
happening inside libxc. It's only for the convenience of the allocation
code, because it needs to operate on one mapping.

Wei.

 Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest

2015-01-13 Thread Andrew Cooper
On 13/01/15 12:11, Wei Liu wrote:
 From libxc's point of view, it only needs to know vnode to pnode mapping
 and size of each vnode to allocate memory accordingly. Add these fields
 to xc_dom structure.

 The caller might not pass in vNUMA information. In that case, a dummy
 layout is generated for the convenience of libxc's allocation code. The
 upper layer (libxl etc) still sees the domain has no vNUMA
 configuration.

 Signed-off-by: Wei Liu wei.l...@citrix.com
 Cc: Ian Campbell ian.campb...@citrix.com
 Cc: Ian Jackson ian.jack...@eu.citrix.com
 Cc: Dario Faggioli dario.faggi...@citrix.com
 Cc: Elena Ufimtseva ufimts...@gmail.com
 ---
 Changes in v3:
 1. Rewrite commit log.
 2. Shorten some error messages.
 ---
  tools/libxc/include/xc_dom.h |5 +++
  tools/libxc/xc_dom_x86.c |   79 
 --
  tools/libxc/xc_private.h |2 ++
  3 files changed, 75 insertions(+), 11 deletions(-)

 diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
 index 07d7224..c459e77 100644
 --- a/tools/libxc/include/xc_dom.h
 +++ b/tools/libxc/include/xc_dom.h
 @@ -167,6 +167,11 @@ struct xc_dom_image {
  struct xc_dom_loader *kernel_loader;
  void *private_loader;
  
 +/* vNUMA information */
 +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */
 +uint64_t *vnode_size; /* vnode size array */

Please make it very clear in the comment here that size is in MB (at
least I presume so, given the shifts by 20).  There are currently no
specified units.

 +unsigned int nr_vnodes;   /* number of elements of above arrays */
 +
  /* kernel loader */
  struct xc_dom_arch *arch_hooks;
  /* allocate up to virt_alloc_end */
 diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
 index bf06fe4..06a7e54 100644
 --- a/tools/libxc/xc_dom_x86.c
 +++ b/tools/libxc/xc_dom_x86.c
 @@ -759,7 +759,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
  int arch_setup_meminit(struct xc_dom_image *dom)
  {
  int rc;
 -xen_pfn_t pfn, allocsz, i, j, mfn;
 +xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
 +int i, j;
  
  rc = x86_compat(dom-xch, dom-guest_domid, dom-guest_type);
  if ( rc )
 @@ -811,18 +812,74 @@ int arch_setup_meminit(struct xc_dom_image *dom)
  /* setup initial p2m */
  for ( pfn = 0; pfn  dom-total_pages; pfn++ )
  dom-p2m_host[pfn] = pfn;
 -
 +
 +/* Setup dummy vNUMA information if it's not provided. Not
 + * that this is a valid state if libxl doesn't provide any
 + * vNUMA information.
 + *
 + * In this case we setup some dummy value for the convenience
 + * of the allocation code. Note that from the user's PoV the
 + * guest still has no vNUMA configuration.
 + */
 +if ( dom-nr_vnodes == 0 )
 +{
 +dom-nr_vnodes = 1;
 +dom-vnode_to_pnode = xc_dom_malloc(dom,
 +
 sizeof(*dom-vnode_to_pnode));
 +dom-vnode_to_pnode[0] = XC_VNUMA_NO_NODE;
 +dom-vnode_size = xc_dom_malloc(dom, sizeof(*dom-vnode_size));
 +dom-vnode_size[0] = (dom-total_pages  PAGE_SHIFT)  20;
 +}
 +
 +total = 0;
 +for ( i = 0; i  dom-nr_vnodes; i++ )
 +total += ((dom-vnode_size[i]  20)  PAGE_SHIFT);

Can I suggest a mb_to_pages() helper rather than opencoding this in
several locations.

 +if ( total != dom-total_pages )
 +{
 +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR,
 + %s: vNUMA page count mismatch (0x%PRIpfn != 
 0x%PRIpfn)\n,
 + __FUNCTION__, total, dom-total_pages);

__func__ please.  It is part of C99 unlike __FUNCTION__ which is a gnuism.

andrewcoop:xen.git$ git grep  __FUNCTION__ | wc -l
230
andrewcoop:xen.git$ git grep  __func__ | wc -l
194

Looks like the codebase is very mixed, but best to err on the side of
the standard.

 +return -EINVAL;
 +}
 +
  /* allocate guest memory */
 -for ( i = rc = allocsz = 0;
 -  (i  dom-total_pages)  !rc;
 -  i += allocsz )
 +pfn_base = 0;
 +for ( i = 0; i  dom-nr_vnodes; i++ )
  {
 -allocsz = dom-total_pages - i;
 -if ( allocsz  1024*1024 )
 -allocsz = 1024*1024;
 -rc = xc_domain_populate_physmap_exact(
 -dom-xch, dom-guest_domid, allocsz,
 -0, 0, dom-p2m_host[i]);
 +unsigned int memflags;
 +uint64_t pages;
 +
 +memflags = 0;
 +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE )
 +{
 +memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]);
 +memflags |= XENMEMF_exact_node_request;
 +}
 +
 +pages = (dom-vnode_size[i]  20)  PAGE_SHIFT;
 +
 +for ( j = 0; 

[Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest

2015-01-13 Thread Wei Liu
From libxc's point of view, it only needs to know vnode to pnode mapping
and size of each vnode to allocate memory accordingly. Add these fields
to xc_dom structure.

The caller might not pass in vNUMA information. In that case, a dummy
layout is generated for the convenience of libxc's allocation code. The
upper layer (libxl etc) still sees the domain has no vNUMA
configuration.

Signed-off-by: Wei Liu wei.l...@citrix.com
Cc: Ian Campbell ian.campb...@citrix.com
Cc: Ian Jackson ian.jack...@eu.citrix.com
Cc: Dario Faggioli dario.faggi...@citrix.com
Cc: Elena Ufimtseva ufimts...@gmail.com
---
Changes in v3:
1. Rewrite commit log.
2. Shorten some error messages.
---
 tools/libxc/include/xc_dom.h |5 +++
 tools/libxc/xc_dom_x86.c |   79 --
 tools/libxc/xc_private.h |2 ++
 3 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 07d7224..c459e77 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -167,6 +167,11 @@ struct xc_dom_image {
 struct xc_dom_loader *kernel_loader;
 void *private_loader;
 
+/* vNUMA information */
+unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */
+uint64_t *vnode_size; /* vnode size array */
+unsigned int nr_vnodes;   /* number of elements of above arrays */
+
 /* kernel loader */
 struct xc_dom_arch *arch_hooks;
 /* allocate up to virt_alloc_end */
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index bf06fe4..06a7e54 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -759,7 +759,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
 int arch_setup_meminit(struct xc_dom_image *dom)
 {
 int rc;
-xen_pfn_t pfn, allocsz, i, j, mfn;
+xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
+int i, j;
 
 rc = x86_compat(dom-xch, dom-guest_domid, dom-guest_type);
 if ( rc )
@@ -811,18 +812,74 @@ int arch_setup_meminit(struct xc_dom_image *dom)
 /* setup initial p2m */
 for ( pfn = 0; pfn  dom-total_pages; pfn++ )
 dom-p2m_host[pfn] = pfn;
-
+
+/* Setup dummy vNUMA information if it's not provided. Not
+ * that this is a valid state if libxl doesn't provide any
+ * vNUMA information.
+ *
+ * In this case we setup some dummy value for the convenience
+ * of the allocation code. Note that from the user's PoV the
+ * guest still has no vNUMA configuration.
+ */
+if ( dom-nr_vnodes == 0 )
+{
+dom-nr_vnodes = 1;
+dom-vnode_to_pnode = xc_dom_malloc(dom,
+sizeof(*dom-vnode_to_pnode));
+dom-vnode_to_pnode[0] = XC_VNUMA_NO_NODE;
+dom-vnode_size = xc_dom_malloc(dom, sizeof(*dom-vnode_size));
+dom-vnode_size[0] = (dom-total_pages  PAGE_SHIFT)  20;
+}
+
+total = 0;
+for ( i = 0; i  dom-nr_vnodes; i++ )
+total += ((dom-vnode_size[i]  20)  PAGE_SHIFT);
+if ( total != dom-total_pages )
+{
+xc_dom_panic(dom-xch, XC_INTERNAL_ERROR,
+ %s: vNUMA page count mismatch (0x%PRIpfn != 
0x%PRIpfn)\n,
+ __FUNCTION__, total, dom-total_pages);
+return -EINVAL;
+}
+
 /* allocate guest memory */
-for ( i = rc = allocsz = 0;
-  (i  dom-total_pages)  !rc;
-  i += allocsz )
+pfn_base = 0;
+for ( i = 0; i  dom-nr_vnodes; i++ )
 {
-allocsz = dom-total_pages - i;
-if ( allocsz  1024*1024 )
-allocsz = 1024*1024;
-rc = xc_domain_populate_physmap_exact(
-dom-xch, dom-guest_domid, allocsz,
-0, 0, dom-p2m_host[i]);
+unsigned int memflags;
+uint64_t pages;
+
+memflags = 0;
+if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE )
+{
+memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]);
+memflags |= XENMEMF_exact_node_request;
+}
+
+pages = (dom-vnode_size[i]  20)  PAGE_SHIFT;
+
+for ( j = 0; j  pages; j += allocsz )
+{
+allocsz = pages - j;
+if ( allocsz  1024*1024 )
+allocsz = 1024*1024;
+
+rc = xc_domain_populate_physmap_exact(dom-xch,
+ dom-guest_domid, allocsz, 0, memflags,
+ dom-p2m_host[pfn_base+j]);
+
+if ( rc )
+{
+if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE )
+xc_dom_panic(dom-xch, XC_INTERNAL_ERROR,
+ %s: fail to allocate 
0x%PRIx64/0x%PRIpfn pages (v=%d, p=%d)\n,
+