Allow the command XEN_SYSCTL_nvdimm_pmem_setup of hypercall XEN_SYSCTL_nvdimm_op to setup a PMEM region for guest data usage. After the setup, that PMEM region will be able to be mapped to guest address space.
Signed-off-by: Haozhong Zhang <haozhong.zh...@intel.com> --- Cc: Ian Jackson <ian.jack...@eu.citrix.com> Cc: Wei Liu <wei.l...@citrix.com> Cc: Andrew Cooper <andrew.coop...@citrix.com> Cc: Jan Beulich <jbeul...@suse.com> --- tools/libxc/include/xenctrl.h | 22 ++++++++ tools/libxc/xc_misc.c | 17 ++++++ xen/common/pmem.c | 118 +++++++++++++++++++++++++++++++++++++++++- xen/include/public/sysctl.h | 3 +- 4 files changed, 157 insertions(+), 3 deletions(-) diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h index 7c5707fe11..41e5e3408c 100644 --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -2621,6 +2621,28 @@ int xc_nvdimm_pmem_get_regions(xc_interface *xch, uint8_t type, int xc_nvdimm_pmem_setup_mgmt(xc_interface *xch, unsigned long smfn, unsigned long emfn); +/* + * Setup the specified PMEM pages for guest data usage. If success, + * these PMEM page can be mapped to guest and be used as the backend + * of vNDIMM devices. + * + * Parameters: + * xch: xc interface handle + * smfn, emfn: the start and end of the PMEM region + * mgmt_smfn, + + * mgmt_emfn: the start and the end MFN of the PMEM region that is + * used to manage this PMEM region. It must be in one of + * those added by xc_nvdimm_pmem_setup_mgmt() calls, and + * not overlap with @smfn - @emfn. + * + * Return: + * On success, return 0. Otherwise, return a non-zero error code. + */ +int xc_nvdimm_pmem_setup_data(xc_interface *xch, + unsigned long smfn, unsigned long emfn, + unsigned long mgmt_smfn, unsigned long mgmt_emfn); + /* Compat shims */ #include "xenctrl_compat.h" diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c index 3ad254f5ae..ef2e9e0656 100644 --- a/tools/libxc/xc_misc.c +++ b/tools/libxc/xc_misc.c @@ -1019,6 +1019,23 @@ int xc_nvdimm_pmem_setup_mgmt(xc_interface *xch, return rc; } +int xc_nvdimm_pmem_setup_data(xc_interface *xch, + unsigned long smfn, unsigned long emfn, + unsigned long mgmt_smfn, unsigned long mgmt_emfn) +{ + DECLARE_SYSCTL; + int rc; + + xc_nvdimm_pmem_setup_common(&sysctl, smfn, emfn, mgmt_smfn, mgmt_emfn); + sysctl.u.nvdimm.u.pmem_setup.type = PMEM_REGION_TYPE_DATA; + + rc = do_sysctl(xch, &sysctl); + if ( rc && sysctl.u.nvdimm.err ) + rc = -sysctl.u.nvdimm.err; + + return rc; +} + /* * Local variables: * mode: C diff --git a/xen/common/pmem.c b/xen/common/pmem.c index dcd8160407..6891ed7a47 100644 --- a/xen/common/pmem.c +++ b/xen/common/pmem.c @@ -34,16 +34,26 @@ static unsigned int nr_raw_regions; /* * All PMEM regions reserved for management purpose are linked to this * list. All of them must be covered by one or multiple PMEM regions - * in list pmem_raw_regions. + * in list pmem_raw_regions, and not appear in list pmem_data_regions. */ static LIST_HEAD(pmem_mgmt_regions); static DEFINE_SPINLOCK(pmem_mgmt_lock); static unsigned int nr_mgmt_regions; +/* + * All PMEM regions that can be mapped to guest are linked to this + * list. All of them must be covered by one or multiple PMEM regions + * in list pmem_raw_regions, and not appear in list pmem_mgmt_regions. + */ +static LIST_HEAD(pmem_data_regions); +static DEFINE_SPINLOCK(pmem_data_lock); +static unsigned int nr_data_regions; + struct pmem { struct list_head link; /* link to one of PMEM region list */ unsigned long smfn; /* start MFN of the PMEM region */ unsigned long emfn; /* end MFN of the PMEM region */ + spinlock_t lock; union { struct { @@ -53,6 +63,11 @@ struct pmem { struct { unsigned long used; /* # of used pages in MGMT PMEM region */ } mgmt; + + struct { + unsigned long mgmt_smfn; /* start MFN of management region */ + unsigned long mgmt_emfn; /* end MFN of management region */ + } data; } u; }; @@ -111,6 +126,7 @@ static int pmem_list_add(struct list_head *list, } new_pmem->smfn = smfn; new_pmem->emfn = emfn; + spin_lock_init(&new_pmem->lock); list_add(&new_pmem->link, cur); out: @@ -261,9 +277,16 @@ static int pmem_get_regions(xen_sysctl_nvdimm_pmem_regions_t *regions) static bool check_mgmt_size(unsigned long mgmt_mfns, unsigned long total_mfns) { - return mgmt_mfns >= + unsigned long required = ((sizeof(struct page_info) * total_mfns) >> PAGE_SHIFT) + ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT); + + if ( required > mgmt_mfns ) + printk(XENLOG_DEBUG "PMEM: insufficient management pages, " + "0x%lx pages required, 0x%lx pages available\n", + required, mgmt_mfns); + + return mgmt_mfns >= required; } static bool check_address_and_pxm(unsigned long smfn, unsigned long emfn, @@ -341,6 +364,93 @@ static int pmem_setup_mgmt(unsigned long smfn, unsigned long emfn) return rc; } +static struct pmem *find_mgmt_region(unsigned long smfn, unsigned long emfn) +{ + struct list_head *cur; + + ASSERT(spin_is_locked(&pmem_mgmt_lock)); + + list_for_each(cur, &pmem_mgmt_regions) + { + struct pmem *mgmt = list_entry(cur, struct pmem, link); + + if ( smfn >= mgmt->smfn && emfn <= mgmt->emfn ) + return mgmt; + } + + return NULL; +} + +static int pmem_setup_data(unsigned long smfn, unsigned long emfn, + unsigned long mgmt_smfn, unsigned long mgmt_emfn) +{ + struct pmem *data, *mgmt = NULL; + unsigned long used_mgmt_mfns; + unsigned int pxm; + int rc; + + if ( smfn == mfn_x(INVALID_MFN) || emfn == mfn_x(INVALID_MFN) || + smfn >= emfn ) + return -EINVAL; + + /* + * Require the PMEM region in one proximity domain, in order to + * avoid the error recovery from multiple calls to pmem_arch_setup() + * which is not revertible. + */ + if ( !check_address_and_pxm(smfn, emfn, &pxm) ) + return -EINVAL; + + if ( mgmt_smfn == mfn_x(INVALID_MFN) || mgmt_emfn == mfn_x(INVALID_MFN) || + mgmt_smfn >= mgmt_emfn ) + return -EINVAL; + + spin_lock(&pmem_mgmt_lock); + mgmt = find_mgmt_region(mgmt_smfn, mgmt_emfn); + if ( !mgmt ) + { + spin_unlock(&pmem_mgmt_lock); + return -ENXIO; + } + spin_unlock(&pmem_mgmt_lock); + + spin_lock(&mgmt->lock); + + if ( mgmt_smfn < mgmt->smfn + mgmt->u.mgmt.used || + !check_mgmt_size(mgmt_emfn - mgmt_smfn, emfn - smfn) ) + { + spin_unlock(&mgmt->lock); + return -ENOSPC; + } + + spin_lock(&pmem_data_lock); + + rc = pmem_list_add(&pmem_data_regions, smfn, emfn, &data); + if ( rc ) + goto out; + data->u.data.mgmt_smfn = data->u.data.mgmt_emfn = mfn_x(INVALID_MFN); + + rc = pmem_arch_setup(smfn, emfn, pxm, + mgmt_smfn, mgmt_emfn, &used_mgmt_mfns); + if ( rc ) + { + pmem_list_del(data); + goto out; + } + + mgmt->u.mgmt.used = mgmt_smfn - mgmt->smfn + used_mgmt_mfns; + data->u.data.mgmt_smfn = mgmt_smfn; + data->u.data.mgmt_emfn = mgmt->smfn + mgmt->u.mgmt.used; + + nr_data_regions++; + + out: + spin_unlock(&pmem_data_lock); + spin_unlock(&mgmt->lock); + + return rc; +} + static int pmem_setup(unsigned long smfn, unsigned long emfn, unsigned long mgmt_smfn, unsigned long mgmt_emfn, unsigned int type) @@ -360,6 +470,10 @@ static int pmem_setup(unsigned long smfn, unsigned long emfn, break; + case PMEM_REGION_TYPE_DATA: + rc = pmem_setup_data(smfn, emfn, mgmt_smfn, mgmt_emfn); + break; + default: rc = -EINVAL; } diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index f825716446..d7c12f23fb 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -1121,6 +1121,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_set_parameter_t); /* Types of PMEM regions */ #define PMEM_REGION_TYPE_RAW 0 /* PMEM regions detected by Xen */ #define PMEM_REGION_TYPE_MGMT 1 /* PMEM regions for management usage */ +#define PMEM_REGION_TYPE_DATA 2 /* PMEM regions for guest data */ /* PMEM_REGION_TYPE_RAW */ struct xen_sysctl_nvdimm_pmem_raw_region { @@ -1176,7 +1177,7 @@ struct xen_sysctl_nvdimm_pmem_setup { /* above PMEM region. If the above PMEM region is */ /* a management region, mgmt_{s,e}mfn is required */ /* to be identical to {s,e}mfn. */ - uint8_t type; /* Only PMEM_REGION_TYPE_MGMT is supported now */ + uint8_t type; /* Must be one of PMEM_REGION_TYPE_{MGMT, DATA} */ }; typedef struct xen_sysctl_nvdimm_pmem_setup xen_sysctl_nvdimm_pmem_setup_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_nvdimm_pmem_setup_t); -- 2.14.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel