I hope the following can address all comments below:
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 2f8e590..a4bd2a1 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -407,7 +407,7 @@ int libxl__domain_build(libxl__gc *gc,
switch (info->type) {
case LIBXL_DOMAIN_TYPE_HVM:
- ret = libxl__build_hvm(gc, domid, info, state);
+ ret = libxl__build_hvm(gc, domid, d_config, state);
if (ret)
goto out;
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 634b8d2..ba852fe 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -92,6 +92,276 @@ const char *libxl__domain_device_model(libxl__gc *gc,
return dm;
}
+static int
+libxl__xc_device_get_rdm(libxl__gc *gc,
+ uint32_t flag,
+ uint16_t seg,
+ uint8_t bus,
+ uint8_t devfn,
+ unsigned int *nr_entries,
+ struct xen_reserved_device_memory **xrdm)
+{
+ int rc = 0, r;
+
+ /*
+ * We really can't presume how many entries we can get in advance.
+ */
+ *nr_entries = 0;
+ r = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
+ NULL, nr_entries);
+ assert(r <= 0);
+ /* "0" means we have no any rdm entry. */
+ if (!r) goto out;
+
+ if (errno != ENOBUFS) {
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
+ GCNEW_ARRAY(*xrdm, *nr_entries);
+ r = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
+ *xrdm, nr_entries);
+ if (r)
+ rc = ERROR_FAIL;
+
+ out:
+ if (rc) {
+ *nr_entries = 0;
+ *xrdm = NULL;
+ LOG(ERROR, "Could not get reserved device memory maps.\n");
+ }
+ return rc;
+}
+
+/*
+ * Check whether there exists rdm hole in the specified memory range.
+ * Returns true if exists, else returns false.
+ */
+static bool overlaps_rdm(uint64_t start, uint64_t memsize,
+ uint64_t rdm_start, uint64_t rdm_size)
+{
+ return (start + memsize > rdm_start) && (start < rdm_start + rdm_size);
+}
+
+static void
+add_rdm_entry(libxl__gc *gc, libxl_domain_config *d_config,
+ uint64_t rdm_start, uint64_t rdm_size, int rdm_policy)
+{
+ assert(d_config->num_rdms);
+
+ d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
+ d_config->num_rdms * sizeof(libxl_device_rdm));
+
+ d_config->rdms[d_config->num_rdms - 1].start = rdm_start;
+ d_config->rdms[d_config->num_rdms - 1].size = rdm_size;
+ d_config->rdms[d_config->num_rdms - 1].policy = rdm_policy;
+}
+
+/*
+ * Check reported RDM regions and handle potential gfn conflicts according
+ * to user preferred policy.
+ *
+ * RDM can reside in address space beyond 4G theoretically, but we never
+ * see this in real world. So in order to avoid breaking highmem layout
+ * we don't solve highmem conflict. Note this means highmem rmrr could
+ * still be supported if no conflict.
+ *
+ * But in the case of lowmem, RDM probably scatter the whole RAM space.
+ * Especially multiple RDM entries would worsen this to lead a complicated
+ * memory layout. And then its hard to extend hvm_info_table{} to work
+ * hvmloader out. So here we're trying to figure out a simple solution to
+ * avoid breaking existing layout. So when a conflict occurs,
+ *
+ * #1. Above a predefined boundary (default 2G)
+ * - Move lowmem_end below reserved region to solve conflict;
+ *
+ * #2. Below a predefined boundary (default 2G)
+ * - Check strict/relaxed policy.
+ * "strict" policy leads to fail libxl.
+ * "relaxed" policy issue a warning message and also mask this entry
+ * INVALID to indicate we shouldn't expose this entry to hvmloader.
+ * Note when both policies are specified on a given region, the per-device
+ * policy should override the global policy.
+ */
+int libxl__domain_device_construct_rdm(libxl__gc *gc,
+ libxl_domain_config *d_config,
+ uint64_t rdm_mem_boundary,
+ struct xc_hvm_build_args *args)
+{
+ int i, j, conflict, rc;
+ struct xen_reserved_device_memory *xrdm = NULL;
+ uint32_t strategy = d_config->b_info.u.hvm.rdm.strategy;
+ uint16_t seg;
+ uint8_t bus, devfn;
+ uint64_t rdm_start, rdm_size;
+ uint64_t highmem_end = args->highmem_end ? args->highmem_end :
(1ull<<32);
+
+ /* Might not expose rdm. */
+ if (strategy == LIBXL_RDM_RESERVE_STRATEGY_IGNORE &&
+ !d_config->num_pcidevs)
+ return 0;
+
+ /* Query all RDM entries in this platform */
+ if (strategy == LIBXL_RDM_RESERVE_STRATEGY_HOST) {
+ unsigned int nr_entries;
+
+ /* Collect all rdm info if exist. */
+ rc = libxl__xc_device_get_rdm(gc, PCI_DEV_RDM_ALL,
+ 0, 0, 0, &nr_entries, &xrdm);
+ if (rc)
+ goto out;
+ if (!nr_entries)
+ return 0;
+
+ assert(xrdm);
+
+ for (i = 0; i < nr_entries; i++)
+ {
+ d_config->num_rdms++;
+ add_rdm_entry(gc, d_config,
+ pfn_to_paddr(xrdm[i].start_pfn),
+ pfn_to_paddr(xrdm[i].nr_pages),
+ d_config->b_info.u.hvm.rdm.policy);
+ }
+ }
+
+ /* Query RDM entries per-device */
+ for (i = 0; i < d_config->num_pcidevs; i++) {
+ unsigned int nr_entries;
+ bool new = true;
+
+ seg = d_config->pcidevs[i].domain;
+ bus = d_config->pcidevs[i].bus;
+ devfn = PCI_DEVFN(d_config->pcidevs[i].dev,
+ d_config->pcidevs[i].func);
+ nr_entries = 0;
+ rc = libxl__xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL,
+ seg, bus, devfn, &nr_entries, &xrdm);
+ if (rc)
+ goto out;
+ /* No RDM to associated with this device. */
+ if (!nr_entries)
+ continue;
+
+ assert(xrdm);
+
+ /*
+ * Need to check whether this entry is already saved in the array.
+ * This could come from two cases:
+ *
+ * - user may configure to get all RDMs in this platform, which
+ * is already queried before this point
+ * - or two assigned devices may share one RDM entry
+ *
+ * Different policies may be configured on the same RDM due to
+ * above two cases. But we don't allow to assign such a group
+ * devies right now so it doesn't come true in our case.
+ */
+ for (j = 0; j < d_config->num_rdms; j++) {
+ if (d_config->rdms[j].start == pfn_to_paddr(xrdm[0].start_pfn))
+ {
+ /*
+ * So the per-device policy always override the global
+ * policy in this case.
+ */
+ d_config->rdms[j].policy = d_config->pcidevs[i].rdm_policy;
+ new = false;
+ break;
+ }
+ }
+
+ if (new) {
+ d_config->num_rdms++;
+ add_rdm_entry(gc, d_config,
+ pfn_to_paddr(xrdm[0].start_pfn),
+ pfn_to_paddr(xrdm[0].nr_pages),
+ d_config->pcidevs[i].rdm_policy);
+ }
+ }
+
+ /*
+ * Next step is to check and avoid potential conflict between RDM
+ * entries and guest RAM. To avoid intrusive impact to existing
+ * memory layout {lowmem, mmio, highmem} which is passed around
+ * various function blocks, below conflicts are not handled which
+ * are rare and handling them would lead to a more scattered
+ * layout:
+ * - RDM in highmem area (>4G)
+ * - RDM lower than a defined memory boundary (e.g. 2G)
+ * Otherwise for conflicts between boundary and 4G, we'll simply
+ * move lowmem end below reserved region to solve conflict.
+ *
+ * If a conflict is detected on a given RDM entry, an error will
+ * be returned if 'strict' policy is specified. Instead, if
+ * 'relaxed' policy specified, this conflict is treated just as a
+ * warning, but we mark this RDM entry as INVALID to indicate that
+ * this entry shouldn't be exposed to hvmloader.
+ *
+ * Firstly we should check the case of rdm < 4G because we may
+ * need to expand highmem_end.
+ */
+ for (i = 0; i < d_config->num_rdms; i++) {
+ rdm_start = d_config->rdms[i].start;
+ rdm_size = d_config->rdms[i].size;
+ conflict = overlaps_rdm(0, args->lowmem_end, rdm_start, rdm_size);
+
+ if (!conflict)
+ continue;
+
+ /* Just check if RDM > our memory boundary. */
+ if (rdm_start > rdm_mem_boundary) {
+ /*
+ * We will move downwards lowmem_end so we have to expand
+ * highmem_end.
+ */
+ highmem_end += (args->lowmem_end - rdm_start);
+ /* Now move downwards lowmem_end. */
+ args->lowmem_end = rdm_start;
+ }
+ }
+
+ /* Sync highmem_end. */
+ args->highmem_end = highmem_end;
+
+ /*
+ * Finally we can take same policy to check lowmem(< 2G) and
+ * highmem adjusted above.
+ */
+ for (i = 0; i < d_config->num_rdms; i++) {
+ rdm_start = d_config->rdms[i].start;
+ rdm_size = d_config->rdms[i].size;
+ /* Does this entry conflict with lowmem? */
+ conflict = overlaps_rdm(0, args->lowmem_end,
+ rdm_start, rdm_size);
+ /* Does this entry conflict with highmem? */
+ conflict |= overlaps_rdm((1ULL<<32),
+ args->highmem_end - (1ULL<<32),
+ rdm_start, rdm_size);
+
+ if (!conflict)
+ continue;
+
+ if (d_config->rdms[i].policy == LIBXL_RDM_RESERVE_POLICY_STRICT) {
+ LOG(ERROR, "RDM conflict at 0x%lx.\n",
d_config->rdms[i].start);
+ goto out;
+ } else {
+ LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n",
+ d_config->rdms[i].start);
+
+ /*
+ * Then mask this INVALID to indicate we shouldn't expose this
+ * to hvmloader.
+ */
+ d_config->rdms[i].policy = LIBXL_RDM_RESERVE_POLICY_INVALID;
+ }
+ }
+
+ return 0;
+
+ out:
+ return ERROR_FAIL;
+}
+
const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config
*guest_config)
{
const libxl_vnc_info *vnc = NULL;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 4cb247a..7e39047 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -921,13 +921,20 @@ out:
}
int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
- libxl_domain_build_info *info,
+ libxl_domain_config *d_config,
libxl__domain_build_state *state)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
struct xc_hvm_build_args args = {};
int ret, rc = ERROR_FAIL;
uint64_t mmio_start, lowmem_end, highmem_end;
+ libxl_domain_build_info *const info = &d_config->b_info;
+ /*
+ * Currently we fix this as 2G to guarantee how to handle
+ * our rdm policy. But we'll provide a parameter to set
+ * this dynamically.
+ */
+ uint64_t rdm_mem_boundary = 0x80000000;
memset(&args, 0, sizeof(struct xc_hvm_build_args));
/* The params from the configuration file are in Mb, which are then
@@ -965,6 +972,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
args.highmem_end = highmem_end;
args.mmio_start = mmio_start;
+ rc = libxl__domain_device_construct_rdm(gc, d_config,
+ rdm_mem_boundary,
+ &args);
+ if (rc) {
+ LOG(ERROR, "checking reserved device memory failed");
+ goto out;
+ }
+
if (info->num_vnuma_nodes != 0) {
int i;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 69fdad8..5e4e771 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -144,6 +144,9 @@
#endif
/* all of these macros preserve errno (saving and restoring) */
+/* Convert pfn to physical address space. */
+#define pfn_to_paddr(x) ((uint64_t)(x) << XC_PAGE_SHIFT)
+
/* logging */
_hidden void libxl__logv(libxl_ctx *ctx, xentoollog_level msglevel,
int errnoval,
const char *file /* may be 0 */, int line /* ignored if
!file */,
@@ -1076,7 +1079,7 @@ _hidden int libxl__build_post(libxl__gc *gc,
uint32_t domid,
_hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid,
libxl_domain_build_info *info, libxl__domain_build_state
*state);
_hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
- libxl_domain_build_info *info,
+ libxl_domain_config *d_config,
libxl__domain_build_state *state);
_hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
@@ -1584,6 +1587,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
int nr_channels, libxl_device_channel *channels);
/*
+ * This function will fix reserved device memory conflict
+ * according to user's configuration.
+ */
+_hidden int libxl__domain_device_construct_rdm(libxl__gc *gc,
+ libxl_domain_config *d_config,
+ uint64_t rdm_mem_guard,
+ struct xc_hvm_build_args *args);
+
+/*
* This function will cause the whole libxl process to hang
* if the device model does not respond. It is deprecated.
*
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index db9f75a..157fa59 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -586,6 +586,12 @@ libxl_device_pci = Struct("device_pci", [
("rdm_policy", libxl_rdm_reserve_policy),
])
+libxl_device_rdm = Struct("device_rdm", [
+ ("start", uint64),
+ ("size", uint64),
+ ("policy", libxl_rdm_reserve_policy),
+ ])
+
libxl_device_dtdev = Struct("device_dtdev", [
("path", string),
])
@@ -616,6 +622,7 @@ libxl_domain_config = Struct("domain_config", [
("disks", Array(libxl_device_disk, "num_disks")),
("nics", Array(libxl_device_nic, "num_nics")),
("pcidevs", Array(libxl_device_pci, "num_pcidevs")),
+ ("rdms", Array(libxl_device_rdm, "num_rdms")),
("dtdevs", Array(libxl_device_dtdev, "num_dtdevs")),
("vfbs", Array(libxl_device_vfb, "num_vfbs")),
("vkbs", Array(libxl_device_vkb, "num_vkbs")),
--
1.9.1
Thanks
Tiejun
On 2015/7/20 21:32, Ian Jackson wrote:
Tiejun Chen writes ("[v10][PATCH 11/16] tools/libxl: detect and avoid conflicts with
RDM"):
While building a VM, HVM domain builder provides struct hvm_info_table{}
to help hvmloader. Currently it includes two fields to construct guest
e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should
check them to fix any conflict with RDM.
...
Thanks. I think this patch is nearly there.
+static int
+libxl__xc_device_get_rdm(libxl__gc *gc,
...
+ *xrdm = libxl__malloc(gc,
+ *nr_entries * sizeof(xen_reserved_device_memory_t));
Sorry for not spotting this before, but this should use GCNEW_ARRAY.
+}
+
+#define pfn_to_paddr(x) ((uint64_t)x << XC_PAGE_SHIFT)
+static void
Missing blank line after the #define - although I think this #define
could profitably be moved to libxl_internal.h. If you do keep it here
please move it further up the file, to at least before any function
or struct definition.
Also the #define is missing safety ( ) around x.
+set_rdm_entries(libxl__gc *gc, libxl_domain_config *d_config,
+ uint64_t rdm_start, uint64_t rdm_size, int rdm_policy,
+ unsigned int nr_entries)
+{
+ assert(nr_entries);
+
+ d_config->num_rdms = nr_entries;
+ d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
+ d_config->num_rdms * sizeof(libxl_device_rdm));
+
+ d_config->rdms[d_config->num_rdms - 1].start = rdm_start;
+ d_config->rdms[d_config->num_rdms - 1].size = rdm_size;
+ d_config->rdms[d_config->num_rdms - 1].policy = rdm_policy;
+}
Thanks for breaking this out. I think though that the division of
labour between this function and the call site is confusing.
Can I suggest a function
void add_rdm_entry(libxl__gc *gc, libxl_domain_config *d_config,
uint64_t rdm_start, uint64_t rdm_size, int rdm_policy)
which assumes that d_config->num_rdms is set correctly, and increments
it ?
(Please put the increment at the end so that the assignments are to
->rdms[d_config->num_rdms], or perhaps make a convenience alias.)
+int libxl__domain_device_construct_rdm(libxl__gc *gc,
+ libxl_domain_config *d_config,
+ uint64_t rdm_mem_boundary,
+ struct xc_hvm_build_args *args)
+{
...
+ /* Query all RDM entries in this platform */
+ if (strategy == LIBXL_RDM_RESERVE_STRATEGY_HOST) {
...
+ } else {
+ d_config->num_rdms = 0;
+ }
Does this not override the domain configuration's num_rdms ? I don't
think that is correct.
If the domain configuration has rdms and num_rdms already set, then
the strategy should presumably be ignored. (Passing the same domain
configuration struct to libxl_domain_create_new, after destroying the
domain, ought to work, even after the first call has modified it.)
Can you please also wrap at least the already-word-wrapped comments to
70 (or maybe 75) columns ? What you have lookes like this when I
quote it for review:
+ * Next step is to check and avoid potential conflict between RDM entri\
es
+ * and guest RAM. To avoid intrusive impact to existing memory layout
+ * {lowmem, mmio, highmem} which is passed around various function bloc\
ks,
+ * below conflicts are not handled which are rare and handling them wou\
ld
Thanks,
Ian.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel