Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest
On Tue, Jan 13, 2015 at 08:02:17PM +, Andrew Cooper wrote: [...] +/* vNUMA information */ +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */ +uint64_t *vnode_size; /* vnode size array */ Please make it very clear in the comment here that size is in MB (at least I presume so, given the shifts by 20). There are currently no specified units. The size will be in pages. +unsigned int nr_vnodes; /* number of elements of above arrays */ + /* kernel loader */ struct xc_dom_arch *arch_hooks; /* allocate up to virt_alloc_end */ diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index bf06fe4..06a7e54 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -759,7 +759,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid) int arch_setup_meminit(struct xc_dom_image *dom) { int rc; -xen_pfn_t pfn, allocsz, i, j, mfn; +xen_pfn_t pfn, allocsz, mfn, total, pfn_base; +int i, j; rc = x86_compat(dom-xch, dom-guest_domid, dom-guest_type); if ( rc ) @@ -811,18 +812,74 @@ int arch_setup_meminit(struct xc_dom_image *dom) /* setup initial p2m */ for ( pfn = 0; pfn dom-total_pages; pfn++ ) dom-p2m_host[pfn] = pfn; - + +/* Setup dummy vNUMA information if it's not provided. Not + * that this is a valid state if libxl doesn't provide any + * vNUMA information. + * + * In this case we setup some dummy value for the convenience + * of the allocation code. Note that from the user's PoV the + * guest still has no vNUMA configuration. + */ +if ( dom-nr_vnodes == 0 ) +{ +dom-nr_vnodes = 1; +dom-vnode_to_pnode = xc_dom_malloc(dom, + sizeof(*dom-vnode_to_pnode)); +dom-vnode_to_pnode[0] = XC_VNUMA_NO_NODE; +dom-vnode_size = xc_dom_malloc(dom, sizeof(*dom-vnode_size)); +dom-vnode_size[0] = (dom-total_pages PAGE_SHIFT) 20; +} + +total = 0; +for ( i = 0; i dom-nr_vnodes; i++ ) +total += ((dom-vnode_size[i] 20) PAGE_SHIFT); Can I suggest a mb_to_pages() helper rather than opencoding this in several locations. Sure. If I end up needing one I will add that helper. +if ( total != dom-total_pages ) +{ +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR, + %s: vNUMA page count mismatch (0x%PRIpfn != 0x%PRIpfn)\n, + __FUNCTION__, total, dom-total_pages); __func__ please. It is part of C99 unlike __FUNCTION__ which is a gnuism. andrewcoop:xen.git$ git grep __FUNCTION__ | wc -l 230 andrewcoop:xen.git$ git grep __func__ | wc -l 194 Looks like the codebase is very mixed, but best to err on the side of the standard. No problem. +return -EINVAL; +} + /* allocate guest memory */ -for ( i = rc = allocsz = 0; - (i dom-total_pages) !rc; - i += allocsz ) +pfn_base = 0; +for ( i = 0; i dom-nr_vnodes; i++ ) { -allocsz = dom-total_pages - i; -if ( allocsz 1024*1024 ) -allocsz = 1024*1024; -rc = xc_domain_populate_physmap_exact( -dom-xch, dom-guest_domid, allocsz, -0, 0, dom-p2m_host[i]); +unsigned int memflags; +uint64_t pages; + +memflags = 0; +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) +{ +memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]); +memflags |= XENMEMF_exact_node_request; +} + +pages = (dom-vnode_size[i] 20) PAGE_SHIFT; + +for ( j = 0; j pages; j += allocsz ) +{ +allocsz = pages - j; +if ( allocsz 1024*1024 ) +allocsz = 1024*1024; + +rc = xc_domain_populate_physmap_exact(dom-xch, + dom-guest_domid, allocsz, 0, memflags, + dom-p2m_host[pfn_base+j]); + +if ( rc ) +{ +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR, + %s: fail to allocate 0x%PRIx64/0x%PRIpfn pages (v=%d, p=%d)\n, + __FUNCTION__, pages, dom-total_pages, i, + dom-vnode_to_pnode[i]); failed to allocate Fixed. I am not sure the total number of pages is useful here, especially as you don't know how many pages have successfully
Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest
On 14/01/15 10:53, Wei Liu wrote: +return -EINVAL; +} + /* allocate guest memory */ -for ( i = rc = allocsz = 0; - (i dom-total_pages) !rc; - i += allocsz ) +pfn_base = 0; +for ( i = 0; i dom-nr_vnodes; i++ ) { -allocsz = dom-total_pages - i; -if ( allocsz 1024*1024 ) -allocsz = 1024*1024; -rc = xc_domain_populate_physmap_exact( -dom-xch, dom-guest_domid, allocsz, -0, 0, dom-p2m_host[i]); +unsigned int memflags; +uint64_t pages; + +memflags = 0; +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) +{ +memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]); +memflags |= XENMEMF_exact_node_request; +} + +pages = (dom-vnode_size[i] 20) PAGE_SHIFT; + +for ( j = 0; j pages; j += allocsz ) +{ +allocsz = pages - j; +if ( allocsz 1024*1024 ) +allocsz = 1024*1024; + +rc = xc_domain_populate_physmap_exact(dom-xch, + dom-guest_domid, allocsz, 0, memflags, + dom-p2m_host[pfn_base+j]); + +if ( rc ) +{ +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR, + %s: fail to allocate 0x%PRIx64/0x%PRIpfn pages (v=%d, p=%d)\n, + __FUNCTION__, pages, dom-total_pages, i, + dom-vnode_to_pnode[i]); failed to allocate Fixed. I am not sure the total number of pages is useful here, especially as you don't know how many pages have successfully been allocated first. Are you suggesting we don't print any number of print more numbers? I am not suggesting that you don't print numbers. pages is useful to know in the error message, but d-total_pages is not, as you don't know how many successful allocations have occurred before, and the total allocated memory so far. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest
Wei Liu writes ([PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest): ... diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 07d7224..c459e77 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -167,6 +167,11 @@ struct xc_dom_image { ... +/* vNUMA information */ +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */ +uint64_t *vnode_size; /* vnode size array */ You don't specify the units. You should probably name the variable _bytes or _pages or something. Looking at the algorithm below it seems to be in _mby. But the domain size is specified in pages. So AFAICT if you try to create a domain which is not a whole number of pages, it is bound to fail ! Perhaps the vnode memory size should be in pages too. +unsigned int nr_vnodes; /* number of elements of above arrays */ Is there some reason to prefer this arrangement with multiple parallel arrays, to one with a single array of structs ? +/* Setup dummy vNUMA information if it's not provided. Not + * that this is a valid state if libxl doesn't provide any + * vNUMA information. + * + * In this case we setup some dummy value for the convenience + * of the allocation code. Note that from the user's PoV the + * guest still has no vNUMA configuration. + */ This arrangement for defaulting makes it difficult to supply only partial information - for example, to supply the number of vnodes but allow the system to make up the details. I have a similar complaint about the corresponding libxl code. I think you should decide where you want the defaulting to be, and do it in a more flexible way in that one place. Probably, libxl. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest
On Tue, Jan 13, 2015 at 05:05:26PM +, Ian Jackson wrote: Wei Liu writes ([PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest): ... diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 07d7224..c459e77 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -167,6 +167,11 @@ struct xc_dom_image { ... +/* vNUMA information */ +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */ +uint64_t *vnode_size; /* vnode size array */ You don't specify the units. You should probably name the variable _bytes or _pages or something. Looking at the algorithm below it seems to be in _mby. But the domain size is specified in pages. So AFAICT if you try to create a domain which is not a whole number of pages, it is bound to fail ! Perhaps the vnode memory size should be in pages too. Let's use page as unit. +unsigned int nr_vnodes; /* number of elements of above arrays */ Is there some reason to prefer this arrangement with multiple parallel arrays, to one with a single array of structs ? No, I don't have preference. I can pack vnode_to_pnode and vnode_size(_pages) into a struct. +/* Setup dummy vNUMA information if it's not provided. Not + * that this is a valid state if libxl doesn't provide any + * vNUMA information. + * + * In this case we setup some dummy value for the convenience + * of the allocation code. Note that from the user's PoV the + * guest still has no vNUMA configuration. + */ This arrangement for defaulting makes it difficult to supply only partial information - for example, to supply the number of vnodes but allow the system to make up the details. I have a similar complaint about the corresponding libxl code. I think you should decide where you want the defaulting to be, and do it in a more flexible way in that one place. Probably, libxl. The defaulting will be in libxl. That's what Dario is working on. If libxl provides information, these dummy values will have no effect. Maybe the comment is confusing. I wasn't saying there defaulting happening inside libxc. It's only for the convenience of the allocation code, because it needs to operate on one mapping. Wei. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest
On 13/01/15 12:11, Wei Liu wrote: From libxc's point of view, it only needs to know vnode to pnode mapping and size of each vnode to allocate memory accordingly. Add these fields to xc_dom structure. The caller might not pass in vNUMA information. In that case, a dummy layout is generated for the convenience of libxc's allocation code. The upper layer (libxl etc) still sees the domain has no vNUMA configuration. Signed-off-by: Wei Liu wei.l...@citrix.com Cc: Ian Campbell ian.campb...@citrix.com Cc: Ian Jackson ian.jack...@eu.citrix.com Cc: Dario Faggioli dario.faggi...@citrix.com Cc: Elena Ufimtseva ufimts...@gmail.com --- Changes in v3: 1. Rewrite commit log. 2. Shorten some error messages. --- tools/libxc/include/xc_dom.h |5 +++ tools/libxc/xc_dom_x86.c | 79 -- tools/libxc/xc_private.h |2 ++ 3 files changed, 75 insertions(+), 11 deletions(-) diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 07d7224..c459e77 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -167,6 +167,11 @@ struct xc_dom_image { struct xc_dom_loader *kernel_loader; void *private_loader; +/* vNUMA information */ +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */ +uint64_t *vnode_size; /* vnode size array */ Please make it very clear in the comment here that size is in MB (at least I presume so, given the shifts by 20). There are currently no specified units. +unsigned int nr_vnodes; /* number of elements of above arrays */ + /* kernel loader */ struct xc_dom_arch *arch_hooks; /* allocate up to virt_alloc_end */ diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index bf06fe4..06a7e54 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -759,7 +759,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid) int arch_setup_meminit(struct xc_dom_image *dom) { int rc; -xen_pfn_t pfn, allocsz, i, j, mfn; +xen_pfn_t pfn, allocsz, mfn, total, pfn_base; +int i, j; rc = x86_compat(dom-xch, dom-guest_domid, dom-guest_type); if ( rc ) @@ -811,18 +812,74 @@ int arch_setup_meminit(struct xc_dom_image *dom) /* setup initial p2m */ for ( pfn = 0; pfn dom-total_pages; pfn++ ) dom-p2m_host[pfn] = pfn; - + +/* Setup dummy vNUMA information if it's not provided. Not + * that this is a valid state if libxl doesn't provide any + * vNUMA information. + * + * In this case we setup some dummy value for the convenience + * of the allocation code. Note that from the user's PoV the + * guest still has no vNUMA configuration. + */ +if ( dom-nr_vnodes == 0 ) +{ +dom-nr_vnodes = 1; +dom-vnode_to_pnode = xc_dom_malloc(dom, + sizeof(*dom-vnode_to_pnode)); +dom-vnode_to_pnode[0] = XC_VNUMA_NO_NODE; +dom-vnode_size = xc_dom_malloc(dom, sizeof(*dom-vnode_size)); +dom-vnode_size[0] = (dom-total_pages PAGE_SHIFT) 20; +} + +total = 0; +for ( i = 0; i dom-nr_vnodes; i++ ) +total += ((dom-vnode_size[i] 20) PAGE_SHIFT); Can I suggest a mb_to_pages() helper rather than opencoding this in several locations. +if ( total != dom-total_pages ) +{ +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR, + %s: vNUMA page count mismatch (0x%PRIpfn != 0x%PRIpfn)\n, + __FUNCTION__, total, dom-total_pages); __func__ please. It is part of C99 unlike __FUNCTION__ which is a gnuism. andrewcoop:xen.git$ git grep __FUNCTION__ | wc -l 230 andrewcoop:xen.git$ git grep __func__ | wc -l 194 Looks like the codebase is very mixed, but best to err on the side of the standard. +return -EINVAL; +} + /* allocate guest memory */ -for ( i = rc = allocsz = 0; - (i dom-total_pages) !rc; - i += allocsz ) +pfn_base = 0; +for ( i = 0; i dom-nr_vnodes; i++ ) { -allocsz = dom-total_pages - i; -if ( allocsz 1024*1024 ) -allocsz = 1024*1024; -rc = xc_domain_populate_physmap_exact( -dom-xch, dom-guest_domid, allocsz, -0, 0, dom-p2m_host[i]); +unsigned int memflags; +uint64_t pages; + +memflags = 0; +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) +{ +memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]); +memflags |= XENMEMF_exact_node_request; +} + +pages = (dom-vnode_size[i] 20) PAGE_SHIFT; + +for ( j = 0;
[Xen-devel] [PATCH v3 03/19] libxc: allocate memory with vNUMA information for PV guest
From libxc's point of view, it only needs to know vnode to pnode mapping and size of each vnode to allocate memory accordingly. Add these fields to xc_dom structure. The caller might not pass in vNUMA information. In that case, a dummy layout is generated for the convenience of libxc's allocation code. The upper layer (libxl etc) still sees the domain has no vNUMA configuration. Signed-off-by: Wei Liu wei.l...@citrix.com Cc: Ian Campbell ian.campb...@citrix.com Cc: Ian Jackson ian.jack...@eu.citrix.com Cc: Dario Faggioli dario.faggi...@citrix.com Cc: Elena Ufimtseva ufimts...@gmail.com --- Changes in v3: 1. Rewrite commit log. 2. Shorten some error messages. --- tools/libxc/include/xc_dom.h |5 +++ tools/libxc/xc_dom_x86.c | 79 -- tools/libxc/xc_private.h |2 ++ 3 files changed, 75 insertions(+), 11 deletions(-) diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 07d7224..c459e77 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -167,6 +167,11 @@ struct xc_dom_image { struct xc_dom_loader *kernel_loader; void *private_loader; +/* vNUMA information */ +unsigned int *vnode_to_pnode; /* vnode to pnode mapping array */ +uint64_t *vnode_size; /* vnode size array */ +unsigned int nr_vnodes; /* number of elements of above arrays */ + /* kernel loader */ struct xc_dom_arch *arch_hooks; /* allocate up to virt_alloc_end */ diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index bf06fe4..06a7e54 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -759,7 +759,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid) int arch_setup_meminit(struct xc_dom_image *dom) { int rc; -xen_pfn_t pfn, allocsz, i, j, mfn; +xen_pfn_t pfn, allocsz, mfn, total, pfn_base; +int i, j; rc = x86_compat(dom-xch, dom-guest_domid, dom-guest_type); if ( rc ) @@ -811,18 +812,74 @@ int arch_setup_meminit(struct xc_dom_image *dom) /* setup initial p2m */ for ( pfn = 0; pfn dom-total_pages; pfn++ ) dom-p2m_host[pfn] = pfn; - + +/* Setup dummy vNUMA information if it's not provided. Not + * that this is a valid state if libxl doesn't provide any + * vNUMA information. + * + * In this case we setup some dummy value for the convenience + * of the allocation code. Note that from the user's PoV the + * guest still has no vNUMA configuration. + */ +if ( dom-nr_vnodes == 0 ) +{ +dom-nr_vnodes = 1; +dom-vnode_to_pnode = xc_dom_malloc(dom, +sizeof(*dom-vnode_to_pnode)); +dom-vnode_to_pnode[0] = XC_VNUMA_NO_NODE; +dom-vnode_size = xc_dom_malloc(dom, sizeof(*dom-vnode_size)); +dom-vnode_size[0] = (dom-total_pages PAGE_SHIFT) 20; +} + +total = 0; +for ( i = 0; i dom-nr_vnodes; i++ ) +total += ((dom-vnode_size[i] 20) PAGE_SHIFT); +if ( total != dom-total_pages ) +{ +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR, + %s: vNUMA page count mismatch (0x%PRIpfn != 0x%PRIpfn)\n, + __FUNCTION__, total, dom-total_pages); +return -EINVAL; +} + /* allocate guest memory */ -for ( i = rc = allocsz = 0; - (i dom-total_pages) !rc; - i += allocsz ) +pfn_base = 0; +for ( i = 0; i dom-nr_vnodes; i++ ) { -allocsz = dom-total_pages - i; -if ( allocsz 1024*1024 ) -allocsz = 1024*1024; -rc = xc_domain_populate_physmap_exact( -dom-xch, dom-guest_domid, allocsz, -0, 0, dom-p2m_host[i]); +unsigned int memflags; +uint64_t pages; + +memflags = 0; +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) +{ +memflags |= XENMEMF_exact_node(dom-vnode_to_pnode[i]); +memflags |= XENMEMF_exact_node_request; +} + +pages = (dom-vnode_size[i] 20) PAGE_SHIFT; + +for ( j = 0; j pages; j += allocsz ) +{ +allocsz = pages - j; +if ( allocsz 1024*1024 ) +allocsz = 1024*1024; + +rc = xc_domain_populate_physmap_exact(dom-xch, + dom-guest_domid, allocsz, 0, memflags, + dom-p2m_host[pfn_base+j]); + +if ( rc ) +{ +if ( dom-vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) +xc_dom_panic(dom-xch, XC_INTERNAL_ERROR, + %s: fail to allocate 0x%PRIx64/0x%PRIpfn pages (v=%d, p=%d)\n, +