Re: kvm-hint-dedicated requires host CPU passthrough?
Oh, I also see that my claws-mail filters are in chaos and that there's additional email to this mail-thread. On Thu, 21 May 2020 16:14:34 +0100 Stefan Hajnoczi wrote: > On Wed, May 20, 2020 at 07:23:32PM +0200, Paolo Bonzini wrote: > > On 20/05/20 19:13, Stefan Hajnoczi wrote: > > > Hi, > > > libvirt refuses to set KVM_HINTS_DEDICATED when the CPU model is not > > > host-passthrough. > > > > > > Is there a reason for this limitation? > > > > > > My understanding is that KVM_HINTS_DEDICATED means the vCPU is pinned to > > > a host CPU that is not shared with other tasks. Any KVM vCPU should be > > > able to support this feature, regardless of whether host-passthrough is > > > used or not. > > > > Perhaps it's because KVM_HINTS_DEDICATED enables mwait passthrough, and > > the details of mwait vary per CPU model? > > KVM_HINTS_DEDICATED (now known as KVM_HINTS_REALTIME) is also used by > cpuidle-haltpoll. In that case we don't care about mwait. > > CPU passthrough can prevent live migration, so I think we should relax > this check, if possible. I agree, the check for should be relaxed. > Wim: Can you let us know what the original intention of checking for > cpu-passthrough was? >From our, my proposed, point of view it was. But that seems silly indeed. Rgds, - Wim10H. > Thanks, > Stefan
Re: kvm-hint-dedicated requires host CPU passthrough?
Hi Stefan, sorry for my very late response as I was temporarily out and unfortunately missed this email thread. For your question (remark), yes, you're right. This feature should not apply to specific scenario. Something else to look into is that the feature can only work if the serving layer underneath by capabilities does support the request. Short, yes, this needs correction. - Wim10H. On Wed, 20 May 2020 18:13:57 +0100 Stefan Hajnoczi wrote: > Hi, > libvirt refuses to set KVM_HINTS_DEDICATED when the CPU model is not > host-passthrough. > > Is there a reason for this limitation? > > My understanding is that KVM_HINTS_DEDICATED means the vCPU is pinned to > a host CPU that is not shared with other tasks. Any KVM vCPU should be > able to support this feature, regardless of whether host-passthrough is > used or not. > > Stefan
[libvirt] [RFC PATCH v1 3/4] qemu: driver changes adding vNUMA memory hotplug support
From: Wim ten Have Add support for hot plugging memory into vNUMA partitioned KVM guests. Hot plugging memory without a target node with result in evenly balancing the added memory along all vNUMA nodes. Signed-off-by: Wim ten Have --- src/qemu/qemu_driver.c | 59 +- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e64afcb8efc9..8d1f0bf13cb7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2348,9 +2348,12 @@ static int qemuDomainSetMemoryFlags(virDomainPtr dom, unsigned long newmem, } if (persistentDef) { -/* resizing memory with NUMA nodes specified doesn't work as there - * is no way to change the individual node sizes with this API */ -if (virDomainNumaGetNodeCount(persistentDef->numa) > 0) { +/* Resizing memory with NUMA nodes specified doesn't work, as there + * is no way to change the individual node sizes with this API, but + * when vNUMA automatic partitioning is in effect resizing is possible. + */ +if (!virDomainVnumaIsEnabled(persistentDef->numa) && +virDomainNumaGetNodeCount(persistentDef->numa) > 0) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("initial memory size of a domain with NUMA " "nodes cannot be modified with this API")); @@ -2365,7 +2368,12 @@ static int qemuDomainSetMemoryFlags(virDomainPtr dom, unsigned long newmem, goto endjob; } -virDomainDefSetMemoryTotal(persistentDef, newmem); +if (virDomainDefSetNUMAMemoryTotal(persistentDef, newmem, driver->caps) < 0) { +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to distribute newly configured " + "memory across NUMA nodes")); +goto endjob; +} if (persistentDef->mem.cur_balloon > newmem) persistentDef->mem.cur_balloon = newmem; @@ -2378,6 +2386,18 @@ static int qemuDomainSetMemoryFlags(virDomainPtr dom, unsigned long newmem, /* resize the current memory */ unsigned long oldmax = 0; +if ((def && +virDomainVnumaIsEnabled(def->numa) && +virDomainNumaGetNodeCount(def->numa)) || +(persistentDef && +virDomainVnumaIsEnabled(persistentDef->numa) && +virDomainNumaGetNodeCount(persistentDef->numa)) > 0) { +virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("the current memory size of a domain with NUMA " + "nodes cannot be modified with this API")); +goto endjob; +} + if (def) oldmax = virDomainDefGetMemoryTotal(def); if (persistentDef) { @@ -7820,6 +7840,7 @@ qemuDomainAttachDeviceLive(virDomainObjPtr vm, { int ret = -1; const char *alias = NULL; +virDomainMemoryDefPtr mem; switch ((virDomainDeviceType)dev->type) { case VIR_DOMAIN_DEVICE_DISK: @@ -7895,8 +7916,34 @@ qemuDomainAttachDeviceLive(virDomainObjPtr vm, case VIR_DOMAIN_DEVICE_MEMORY: /* note that qemuDomainAttachMemory always consumes dev->data.memory * and dispatches DeviceAdded event on success */ -ret = qemuDomainAttachMemory(driver, vm, - dev->data.memory); + +mem = dev->data.memory; +if (mem->targetNode >= 0) { +ret = qemuDomainAttachMemory(driver, vm, + dev->data.memory); +} else { +size_t i, ncells = virDomainNumaGetNodeCount(vm->def->numa); +unsigned long long memsizeCell = dev->data.memory->size / ncells; + +for (i = 0; i < ncells; i++) { + +if (VIR_ALLOC(mem) < 0) { +ret = -1; +break; +} + +memcpy(mem, dev->data.memory, sizeof(virDomainMemoryDef)); + +if (dev->data.memory->sourceNodes) +virBitmapCopy(mem->sourceNodes, dev->data.memory->sourceNodes); + +mem->size = memsizeCell; +mem->targetNode = i; + +ret = qemuDomainAttachMemory(driver, vm, mem); +} +virDomainMemoryDefFree(dev->data.memory); +} dev->data.memory = NULL; break; -- 2.21.0 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC PATCH v1 2/4] qemu: driver changes adding vNUMA vCPU hotplug support
From: Wim ten Have Add support for hot plugging/unplugging vCPUs in vNUMA partitioned KVM guests. Signed-off-by: Wim ten Have Signed-off-by: Menno Lageman --- src/qemu/qemu_driver.c | 6 ++- src/qemu/qemu_hotplug.c | 95 ++--- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 71947efa4e50..e64afcb8efc9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4965,14 +4965,16 @@ qemuDomainSetVcpusMax(virQEMUDriverPtr driver, return -1; } -if (virDomainNumaGetCPUCountTotal(persistentDef->numa) > nvcpus) { +if (!virDomainVnumaIsEnabled(persistentDef->numa) && +virDomainNumaGetCPUCountTotal(persistentDef->numa) > nvcpus) { virReportError(VIR_ERR_INVALID_ARG, "%s", _("Number of CPUs in exceeds the desired " "maximum vcpu count")); return -1; } -if (virDomainDefGetVcpusTopology(persistentDef, &topologycpus) == 0 && +if (!virDomainVnumaIsEnabled(persistentDef->numa) && +virDomainDefGetVcpusTopology(persistentDef, &topologycpus) == 0 && nvcpus != topologycpus) { /* allow setting a valid vcpu count for the topology so an invalid * setting may be corrected via this API */ diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 2d47f7461f93..2d48c5bba762 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -6081,6 +6081,60 @@ qemuDomainHotplugAddVcpu(virQEMUDriverPtr driver, } +/** + * qemuDomainGetNumaMappedVcpuEntry: + * + * In case of vNUMA guest description we need the node + * mapped vcpu to ensure that guest vcpus are hot-plugged + * or hot-unplugged in a round-robin fashion with whole + * cores on the same NUMA node so they get sibling host + * CPUs. + * + * 2 NUMA node system, 2 threads/core: + * +---+---+---+---+---+---+---+---+---+---+--// + * vcpu | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |... + * +---+---+---+---+---+---+---+---+---+---+--// + * NUMA \--/ \-/ \-/ \-/ \-/ \-// + * node 0 1 0 1 0 ... + * + * bit0 1 0 1 2 3 2 3 4 5 ... + * + * 4 NUMA node system, 2 threads/core: + * +---+---+---+---+---+---+---+---+---+---+--// + * vcpu | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |... + * +---+---+---+---+---+---+---+---+---+---+--// + * NUMA \--/ \-/ \-/ \-/ \-/ \-// + * node 0 1 2 3 0 ... + * + * bit0 1 0 1 0 1 0 1 2 3 ... + * + */ +static ssize_t +qemuDomainGetNumaMappedVcpuEntry(virDomainDefPtr def, + ssize_t vcpu) +{ +virBitmapPtr nodecpumask = NULL; +size_t ncells = virDomainNumaGetNodeCount(def->numa); +size_t threads = def->cpu->threads ? def->cpu->threads : 1; +ssize_t node, bit, pcpu = -1; + +if (!ncells) +return vcpu; + +node = (vcpu / threads) % ncells; +nodecpumask = virDomainNumaGetNodeCpumask(def->numa, node); + +bit = ((vcpu / (threads * ncells)) * threads) + (vcpu % threads); + +while (((pcpu = virBitmapNextSetBit(nodecpumask, pcpu)) >= 0) && bit--); + +/* GIGO: Garbage In? Garbage Out! */ +pcpu = (pcpu < 0) ? vcpu : pcpu; + +return pcpu; +} + + /** * qemuDomainSelectHotplugVcpuEntities: * @@ -6104,7 +6158,27 @@ qemuDomainSelectHotplugVcpuEntities(virDomainDefPtr def, qemuDomainVcpuPrivatePtr vcpupriv; unsigned int maxvcpus = virDomainDefGetVcpusMax(def); unsigned int curvcpus = virDomainDefGetVcpus(def); -ssize_t i; +ssize_t i, target; +size_t threads = def->cpu->threads; +size_t nnumaCell = virDomainNumaGetNodeCount(def->numa); +size_t minvcpus = nnumaCell * threads; +bool HasAutonuma = virDomainVnumaIsEnabled(def->numa); + +/* If SMT topology is in place, check that the number of vcpus meets + * the following constraints: + * - at least one fully used core is assigned on each NUMA node + * - cores must be used fully, i.e. all threads of a core are assigned to + * the same guest + */ +if (HasAutonuma && threads && +(nvcpus < minvcpus || (nvcpus - minvcpus) % threads)) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("vNUMA: guest %s configured %d vcpus setting " + "does not fit the vNUMA topology for at " + "least one whole core per vNUMA node."), + def->name, nvcpus); +goto error; +} if (!(ret = virBitmapNew(maxvcpus))) return NULL; @@ -6113,7 +6187,9 @@ qemuDomainSelectHotplugVcpuEntities(virDomainDefPtr def, *enable =
[libvirt] [RFC PATCH v1 4/4] tests: add various tests to exercise vNUMA host partitioning
From: Wim ten Have Tests for the new element and its variations. Signed-off-by: Wim ten Have --- .../cpu-host-passthrough-nonuma.args | 29 .../cpu-host-passthrough-nonuma.xml | 19 +++ .../cpu-host-passthrough-numa-contiguous.args | 37 ++ .../cpu-host-passthrough-numa-contiguous.xml | 20 +++ .../cpu-host-passthrough-numa-interleave.args | 41 ++ .../cpu-host-passthrough-numa-interleave.xml | 19 +++ ...host-passthrough-numa-node-contiguous.args | 53 ...-host-passthrough-numa-node-contiguous.xml | 21 +++ ...host-passthrough-numa-node-interleave.args | 41 ++ ...-host-passthrough-numa-node-interleave.xml | 22 +++ ...ost-passthrough-numa-node-round-robin.args | 125 ++ ...host-passthrough-numa-node-round-robin.xml | 21 +++ ...u-host-passthrough-numa-node-siblings.args | 32 + ...pu-host-passthrough-numa-node-siblings.xml | 23 ...cpu-host-passthrough-numa-round-robin.args | 37 ++ .../cpu-host-passthrough-numa-round-robin.xml | 22 +++ .../cpu-host-passthrough-numa-siblings.args | 37 ++ .../cpu-host-passthrough-numa-siblings.xml| 20 +++ .../cpu-host-passthrough-numa.args| 37 ++ .../cpu-host-passthrough-numa.xml | 20 +++ tests/qemuxml2argvtest.c | 10 ++ 21 files changed, 686 insertions(+) create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-contiguous.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-contiguous.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-interleave.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-interleave.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-contiguous.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-contiguous.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-interleave.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-interleave.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-round-robin.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-round-robin.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-siblings.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-node-siblings.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-round-robin.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-round-robin.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-siblings.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa-siblings.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa.xml diff --git a/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args new file mode 100644 index ..197bda882a01 --- /dev/null +++ b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args @@ -0,0 +1,29 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/tmp/lib/domain--1-QEMUGuest1 \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/tmp/lib/domain--1-QEMUGuest1/.local/share \ +XDG_CACHE_HOME=/tmp/lib/domain--1-QEMUGuest1/.cache \ +XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=kvm,usb=off,dump-guest-core=off \ +-cpu host \ +-m 214 \ +-realtime mlock=off \ +-smp 1,sockets=1,cores=1,threads=1 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\ +server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-usb \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml new file mode 100644 index ..c7838aed8e12 --- /dev/null +++ b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml @@ -0,0 +1,19 @@ + + QEMUGuest1 + c7a5fdbd-edaf-9455-926a-d65c16db1809 + 219100 + 219100 + 1 + +hvm + + + + + destroy + restart + destroy + +/usr/bin/qemu-system-x86_64 + + diff --git a/tests/qemuxml2argvdata/cpu-host-passthrough-numa-contiguous.args b/tests/qemuxml2argvdata/cpu-host-passthrough-numa-contiguous.args new file mode 100644 index ..6a59cf7b44e6 --- /dev/null +++ b/tests/qemuxml2argvdata/cpu-host-passthrough-numa-contiguous.args @@ -0,0 +1,37 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/tmp/lib
[libvirt] [RFC PATCH v1 0/4] NUMA Host or Node Partitioning
From: Wim ten Have This patch extends guest domain administration by adding a feature that creates a guest with a NUMA layout, also referred to as vNUMA (Virtual NUMA). NUMA (Non-Uniform Memory Access) is a method of configuring a cluster of nodes within a single multiprocessing system such that each node shares its processor local memory with other nodes, improving performance and the ability of the system to be expanded. The illustration below shows a typical 4-node NUMA system. Within this system, each socket is equipped with its own distinct memory and some also with I/O. Access to memory or I/O on remote nodes is only possible communicating through the "Interconnect." +-+---++---+-+ |NODE0| | || | |NODE3| | | CPU00 | CPU03 || CPU12 | CPU15 | | | | | || | | | | Mem +--- Socket0 ---<>--- Socket3 ---+ Mem | | | | || | | | +-+ CPU01 | CPU02 || CPU13 | CPU14 | | | I/O | | || | | | +-+---^---++---^---+-+ || | Interconnect | || +-v---++---v-+ |NODE1| | || | |NODE2| | | CPU04 | CPU07 || CPU08 | CPU11 | | | | | || | | | | Mem +--- Socket1 ---<>--- Socket2 ---+ Mem | | | | || | | | +-+ CPU05 | CPU06 || CPU09 | CPU10 | | | I/O | | || | | | +-+---+---++---+---+-+ Unfortunately, NUMA architectures have some drawbacks. For example, when data is stored in memory associated with Socket2 but is accessed by a CPU in Socket0, that CPU uses the interconnect to access the memory associated with Socket2. These interconnect hops add data access delays. Some high performance software takes NUMA architecture into account by carefully placing data in memory and pinning the processes most likely to access that data to CPUs with the shortest access times. Similarly, such software can pin its I/O processes to CPUs with the shortest access times to I/O devices. When such software is run within a guest VM, constructing the VM such that its virtual NUMA topology mirrors the physical NUMA topology preserves the application software's performance. The changes brought by this patch series add a new libvirt domain element named that allows for dynamic 'host' or 'node' partitioning of a guest where libvirt inspects the host capabilities and renders a best guest XML design holding a host matching vNUMA topology. .. 524288 .. The content of this element causes libvirt to dynamically partition the guest domain XML into a 'host' or 'node' numa model. Under the guest domain is automatically partitioned according to the "host" capabilities. Under the guest domain is partitioned according to the nodeset and cells under the vnuma partition subelement. The optional attribute distribution='type' is to indicate the guest numa cell cpus distribution. This distribution='type' can have the following values: - 'contiguous' delivery, under which the cpus enumerate sequentially over the numa defined cells. - 'siblings' cpus are distributed over the numa cells matching the host CPU SMT model. - 'round-robin' cpus are distributed over the numa cells matching the host CPU topology. - 'interleave' cpus are interleaved one at a time over the numa cells. The optional subelement specifies the memory size reserved for the guest to dimension its size. If no memory is specified, the setting is acquired from the guest's total memory, setting. The optional attribute is only active when is in effect and allows for defining the active "nodeset" and "cells" to target for under the "guest" domain. For example, the specified attribute "nodeset" can limit the assigned host NUMA nodes in effect under the guest with help of NUMA node tuning (.) Alternatively, the provided "cells" attribute can define the guest number of vNUMA cells to render. We're planning a 'virsh vnuma' command to convert existing guest domains to one of these vNUMA models. Wim ten Have (4): XML definitions for guest vNUMA and parsing routines qemu: driver changes adding vNUMA vCPU hotplug support qemu: driver changes adding vNUMA memory hotplug support tests: add various tests to exercise vNUMA host partitioning do
[libvirt] [RFC PATCH v1 1/4] XML definitions for guest vNUMA and parsing routines
From: Wim ten Have This patch adds XML definitions to a guest with a vNUMA layout and contains routines to parse the same. The guest vNUMA specification looks like: size With mode='host' the guest XML is rendered to match the host's NUMA topology. With mode='node' the guest XML is rendered according to the "nodes" and "cells" attributes of the element. Signed-off-by: Wim ten Have --- docs/formatdomain.html.in | 94 +++ docs/schemas/domaincommon.rng | 65 + src/conf/domain_conf.c| 482 +- src/conf/domain_conf.h| 2 + src/conf/numa_conf.c | 241 - src/conf/numa_conf.h | 58 +++- src/libvirt_private.syms | 8 + 7 files changed, 932 insertions(+), 18 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 962766b792d3..80165f9bd896 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1294,6 +1294,98 @@ +NUMA Host or Node Partitioning + + + With the help of the vnuma element, libvirt can + dynamically partition a guest domain for vNUMA by rendering its XML + into a 'host' or 'node' NUMA + topology matching model. + + + +<domain> + ... + <vnuma mode='host|node' distribution='contiguous|siblings|round-robin|interleave'> +<memory unit='KiB'>524288</memory> +<partition nodeset="1-4,^3" cells="8"/> + </vnuma> + ... +</domain> + + + + vnuma + +The attribute mode selects a specific rendering +method. Its value is either "host" or "node." If mode +is set to "host" the guest domain is automatically partitioned +to match the host NUMA topology. If mode +is "node," the guest domain is partitioned according to the +nodeset and cells under the +vnuma partition subelement. +Since 5.9 + +The optional attribute distribution selects the +guest numa +cell cpus distribution. It allows +Since 5.9 for: + + contiguous + The cpus are enumerate sequentially over the +numa defined +cells. + + siblings + The cpus are distributed over the +numa +cells matching the host CPU SMT model. + + round-robin + The cpus are distributed over the +numa +cells matching the host CPU topology. + + interleave + The cpus are interleaved one at a time over the +numa cells. + + + + + memory + +The optional subelement memory specifies the +memory size reserved for the guest assigned +numa cells. +Since 1.2.11, one can use an additional +unit +attribute to define units in which this memory +size is quantified. If no memory is specified, the +memory setting is +acquired to set this subelement documented +vnuma value. +Since 5.9 + + + partition + +The optional attribute partition is only active when +vnuma +mode "node" is selected and allows for defining the +active "nodeset" and "cells" to target for under the "guest" domain. +For example; the specified nodeset can limit the +numatune assigned +host NUMA nodes in effect under the "guest". Alternatively, +the provided cells attribute can define the number +of numa cells +to render. + +Since 5.9 + + + + NUMA Node Tuning @@ -1755,6 +1847,8 @@ +NUMA topology + Guest NUMA topology can be specified using the numa element. Since 0.9.8 diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index e06f892da393..227c856a362c 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -786,6 +786,10 @@ + + + + @@ -1062,6 +1066,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +host +node + + + + + + + +contiguous +siblings +round-robin +interleave + + + + + + + + + + + + + + + + diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 31
Re: [libvirt] [RFC PATCH auto partition NUMA guest domains v1 0/2] auto partition guests providing the host NUMA topology
On Tue, 25 Sep 2018 14:37:15 +0200 Jiri Denemark wrote: > On Tue, Sep 25, 2018 at 12:02:40 +0200, Wim Ten Have wrote: > > From: Wim ten Have > > > > This patch extends the guest domain administration adding support > > to automatically advertise the host NUMA node capabilities obtained > > architecture under a guest by creating a vNUMA copy. > > I'm pretty sure someone would find this useful and such configuration is > perfectly valid in libvirt. But I don't think there is a compelling > reason to add some magic into the domain XML which would automatically > expand to such configuration. It's basically a NUMA placement policy and > libvirt generally tries to avoid including any kind of policies and > rather just provide all the mechanisms and knobs which can be used by > applications to implement any policy they like. > > > The mechanism is enabled by setting the check='numa' attribute under > > the CPU 'host-passthrough' topology: > > > > Anyway, this is definitely not the right place for such option. The > 'check' attribute is described as > > "Since 3.2.0, an optional check attribute can be used to request a > specific way of checking whether the virtual CPU matches the > specification." > > and the new 'numa' value does not fit in there in any way. > > Moreover the code does the automatic NUMA placement at the moment > libvirt parses the domain XML, which is not the right place since it > would break migration, snapshots, and save/restore features. Howdy, thanks for your fast response. I was Out Of Office for a while unable to reply earlier. The beef of this code does indeed not belong under the domain code and should rather move into the NUMA specific code where check='numa' is simply badly chosen. Also whilst OOO it occurred to me that besides auto partitioning the host into a vNUMA replica there's probably even other configuration target we may introduce reserving a single NUMA-node out of the nodes reserved for a guest to configure. So my plan is to come back asap with reworked code. > We have existing placement attributes for vcpu and numatune/memory > elements which would have been much better place for implementing such > feature. And event cpu/numa element could have been enhanced to support > similar configuration. Going over libvirt documentation I am more appealed with vcpu area. As said let me rework and return with better approach/RFC asap. Rgds, - Wim10H. > Jirka > > -- > libvir-list mailing list > libvir-list@redhat.com > https://www.redhat.com/mailman/listinfo/libvir-list -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC PATCH auto partition NUMA guest domains v1 2/2] qemuxml2argv: add tests that exercise vNUMA auto partition topology
From: Wim ten Have Add tests to ensure that the virDomainNumaAutoconfig() routine to auto- partition vNUMA topology generates correct KVM/QEMU cmdline arguments under applicable setup. Signed-off-by: Wim ten Have --- .../cpu-host-passthrough-nonuma.args | 25 .../cpu-host-passthrough-nonuma.xml | 18 .../cpu-host-passthrough-numa.args| 29 +++ .../cpu-host-passthrough-numa.xml | 18 tests/qemuxml2argvtest.c | 2 ++ 5 files changed, 92 insertions(+) create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa.args create mode 100644 tests/qemuxml2argvdata/cpu-host-passthrough-numa.xml diff --git a/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args new file mode 100644 index ..4599cdfcc159 --- /dev/null +++ b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.args @@ -0,0 +1,25 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=kvm,usb=off,dump-guest-core=off \ +-cpu host \ +-m 214 \ +-smp 1,sockets=1,cores=1,threads=1 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\ +server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-usb \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml new file mode 100644 index ..d8daa8c9a43a --- /dev/null +++ b/tests/qemuxml2argvdata/cpu-host-passthrough-nonuma.xml @@ -0,0 +1,18 @@ + + QEMUGuest1 + c7a5fdbd-edaf-9455-926a-d65c16db1809 + 219100 + 219100 + 1 + +hvm + + + + destroy + restart + destroy + +/usr/bin/qemu-system-x86_64 + + diff --git a/tests/qemuxml2argvdata/cpu-host-passthrough-numa.args b/tests/qemuxml2argvdata/cpu-host-passthrough-numa.args new file mode 100644 index ..5b7b357cc0fe --- /dev/null +++ b/tests/qemuxml2argvdata/cpu-host-passthrough-numa.args @@ -0,0 +1,29 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=kvm,usb=off,dump-guest-core=off \ +-cpu host \ +-m 216 \ +-smp 6,sockets=6,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0,cpus=4,mem=54 \ +-numa node,nodeid=1,cpus=1,cpus=5,mem=54 \ +-numa node,nodeid=2,cpus=2,mem=54 \ +-numa node,nodeid=3,cpus=3,mem=54 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\ +server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-usb \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvdata/cpu-host-passthrough-numa.xml b/tests/qemuxml2argvdata/cpu-host-passthrough-numa.xml new file mode 100644 index ..39488521b27d --- /dev/null +++ b/tests/qemuxml2argvdata/cpu-host-passthrough-numa.xml @@ -0,0 +1,18 @@ + + QEMUGuest1 + c7a5fdbd-edaf-9455-926a-d65c16db1809 + 219100 + 219100 + 6 + +hvm + + + + destroy + restart + destroy + +/usr/bin/qemu-system-x86_64 + + diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index a7cde3ed7e74..4b6436b32ac7 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -1745,6 +1745,8 @@ mymain(void) FLAG_SKIP_LEGACY_CPUS | FLAG_EXPECT_FAILURE, 0, GIC_NONE, NONE); DO_TEST("cpu-host-passthrough", QEMU_CAPS_KVM); +DO_TEST("cpu-host-passthrough-numa", QEMU_CAPS_NUMA); +DO_TEST("cpu-host-passthrough-nonuma", QEMU_CAPS_NUMA); DO_TEST_FAILURE("cpu-qemu-host-passthrough", QEMU_CAPS_KVM); qemuTestSetHostArch(driver.caps, VIR_ARCH_S390X); -- 2.17.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC PATCH auto partition NUMA guest domains v1 1/2] domain: auto partition guests providing the host NUMA topology
From: Wim ten Have Add a mechanism to auto partition the host NUMA topology under the guest domain. This patch adds a framework to automatically partition the host into a small vNUMA subset defined by the guest XML given and description when are in effect and the hypervisor indicates per the host capabilities that a physical NUMA topology is in effect. The mechanism automatically renders the host capabilities provided NUMA architecture, evenly balances the guest reserved vcpu and memory amongst its vNUMA composed cells and have the cell allocated vcpus pinned towards the host NUMA node physical cpusets. This in such way that the host NUMA topology is still in effect under the partitioned guest vNUMA domain. Signed-off-by: Wim ten Have --- docs/formatdomain.html.in | 7 ++ docs/schemas/cputypes.rng | 1 + src/conf/cpu_conf.c | 3 +- src/conf/cpu_conf.h | 1 + src/conf/domain_conf.c| 166 ++ 5 files changed, 177 insertions(+), 1 deletion(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 1f12ab5b4214..ba073d952545 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1500,6 +1500,13 @@ The virtual CPU created by the hypervisor will be checked against the CPU specification and the domain will not be started unless the two CPUs match. + + numa + When the CPU mode='host-passthrough' check='numa' option +combination is set, libvirt auto partitions the guest domain +by rendering the host NUMA architecture. Here the virtual +CPUs and memory are evenly balanced across the defined NUMA +nodes. The vCPUs are also pinned to their physical CPUs. Since 0.9.10, an optional mode diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng index 1f1e0e36d59b..d384d161ee7e 100644 --- a/docs/schemas/cputypes.rng +++ b/docs/schemas/cputypes.rng @@ -29,6 +29,7 @@ none partial full +numa diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c index 863413e75eaa..0d52f6aa4813 100644 --- a/src/conf/cpu_conf.c +++ b/src/conf/cpu_conf.c @@ -52,7 +52,8 @@ VIR_ENUM_IMPL(virCPUCheck, VIR_CPU_CHECK_LAST, "default", "none", "partial", - "full") + "full", + "numa") VIR_ENUM_IMPL(virCPUFallback, VIR_CPU_FALLBACK_LAST, "allow", diff --git a/src/conf/cpu_conf.h b/src/conf/cpu_conf.h index 9f2e7ee2649d..f2e2f0bef3ae 100644 --- a/src/conf/cpu_conf.h +++ b/src/conf/cpu_conf.h @@ -68,6 +68,7 @@ typedef enum { VIR_CPU_CHECK_NONE, VIR_CPU_CHECK_PARTIAL, VIR_CPU_CHECK_FULL, +VIR_CPU_CHECK_NUMA, VIR_CPU_CHECK_LAST } virCPUCheck; diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 9911d56130a9..c2f9398cfe85 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -1759,6 +1759,168 @@ virDomainDefGetVcpusTopology(const virDomainDef *def, } +/** + * virDomainNumaAutoconfig: auto partition guest vNUMA XML definitions + * taking the machine NUMA topology creating a small guest copy instance. + * @def: domain definition + * @caps: host capabilities + * + * Auto partitioning vNUMA guests is requested under XML configuration + * . Here libvirt takes the + * host NUMA topology, including maxvcpus, online vcpus, memory and + * pinning node cpu's where it renders the guest domain vNUMA topology + * building an architectural copy of the host. + * + * Returns 0 on success and -1 on error. + */ +static int +virDomainNumaAutoconfig(virDomainDefPtr def, +virCapsPtr caps) +{ +int ret = -1; + +if (caps && def->cpu && +def->cpu->mode == VIR_CPU_MODE_HOST_PASSTHROUGH && +def->cpu->check == VIR_CPU_CHECK_NUMA) { + +size_t i, cell; +size_t nvcpus = 0; +size_t nnumaCell = 0; +unsigned long long memsizeCell = 0; +virBitmapPtr vnumask = NULL; +virCapsHostPtr host = &caps->host; + +nnumaCell = host->nnumaCell; +if (!nnumaCell) +goto cleanup; + +/* Compute the online vcpus */ +for (i = 0; i < def->maxvcpus; i++) +if (def->vcpus[i]->online) +nvcpus++; + +if (nvcpus < nnumaCell) { +VIR_WARN("vNUMA disabled: %ld vcpus is insufficient " + "to arrange a vNUMA topology for %ld nodes.", + nvcpus, nnumaCell); +goto cleanup; +} + +/* Compute the memory size (memsizeCell) per arranged nnumaCell + */ +if ((memsizeCell = def->mem.total_memory / nnumaCell) == 0) +goto clea
[libvirt] [RFC PATCH auto partition NUMA guest domains v1 0/2] auto partition guests providing the host NUMA topology
From: Wim ten Have This patch extends the guest domain administration adding support to automatically advertise the host NUMA node capabilities obtained architecture under a guest by creating a vNUMA copy. The mechanism is enabled by setting the check='numa' attribute under the CPU 'host-passthrough' topology: When enabled the mechanism automatically renders the host capabilities provided NUMA architecture, evenly balances the guest reserved vcpu and memory amongst its vNUMA composed cells and have the cell allocated vcpus pinned towards the host NUMA node physical cpusets. This in such way that the host NUMA topology is still in effect under the partitioned guest domain. Below example auto partitions the host 'lscpu' listed physical NUMA detail under a guest domain vNUMA description. [root@host ]# lscpu Architecture: x86_64 CPU op-mode(s):32-bit, 64-bit Byte Order:Little Endian CPU(s):240 On-line CPU(s) list: 0-239 Thread(s) per core:2 Core(s) per socket:15 Socket(s): 8 NUMA node(s): 8 Vendor ID: GenuineIntel CPU family:6 Model: 62 Model name:Intel(R) Xeon(R) CPU E7-8895 v2 @ 2.80GHz Stepping: 7 CPU MHz: 3449.555 CPU max MHz: 3600. CPU min MHz: 1200. BogoMIPS: 5586.28 Virtualization:VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 38400K NUMA node0 CPU(s): 0-14,120-134 NUMA node1 CPU(s): 15-29,135-149 NUMA node2 CPU(s): 30-44,150-164 NUMA node3 CPU(s): 45-59,165-179 NUMA node4 CPU(s): 60-74,180-194 NUMA node5 CPU(s): 75-89,195-209 NUMA node6 CPU(s): 90-104,210-224 NUMA node7 CPU(s): 105-119,225-239 Flags: ... The guest 'anuma' without the auto partition rendering enabled reads; "" anuma 3f439f5f-1156-4d48-9491-945a2c0abc6d 67108864 67108864 16 hvm destroy restart destroy /usr/bin/qemu-system-x86_64 Enabling the auto partitioning the guest 'anuma' XML is rewritten as listed below; "" anuma 3f439f5f-1156-4d48-9491-945a2c0abc6d 67108864 67108864 16 hvm destroy restart destroy /usr/bin/qemu-system-x86_64 Finally the auto partitioned guest anuma 'lscpu' listed virtual vNUMA detail. [root@anuma ~]# lscpu Architecture:x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0-15 Thread(s) per core: 2 Core(s) per socket: 1 Socket(s): 8 NUMA node(s):8 Vendor ID: GenuineIntel CPU family: 6 Model: 62
[libvirt] [PATCH v2 1/1] storage: extend preallocation flags support for qemu-img
From: Wim ten Have This patch adds support to qcow2 formatted filesystem object storage by instructing qemu-img to build them with preallocation=falloc whenever the XML described storage matches its . For all other cases the filesystem stored objects are built with preallocation=metadata. Signed-off-by: Wim ten Have --- src/conf/storage_conf.c| 3 +++ src/storage/storage_util.c | 10 -- .../qcow2-nocapacity-convert-prealloc.argv | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c index 5036ab9ef..2bd077237 100644 --- a/src/conf/storage_conf.c +++ b/src/conf/storage_conf.c @@ -1184,6 +1184,9 @@ virStorageVolDefParseXML(virStoragePoolDefPtr pool, ret->target.allocation = ret->target.capacity; } +if (ret->target.allocation < ret->target.capacity) +ret->target.sparse = true; + ret->target.path = virXPathString("string(./target/path)", ctxt); if (options->formatFromString) { char *format = virXPathString("string(./target/format/@type)", ctxt); diff --git a/src/storage/storage_util.c b/src/storage/storage_util.c index b4aed0f70..c4f311ded 100644 --- a/src/storage/storage_util.c +++ b/src/storage/storage_util.c @@ -856,6 +856,7 @@ struct _virStorageBackendQemuImgInfo { const char *compat; virBitmapPtr features; bool nocow; +bool sparse; const char *backingPath; int backingFormat; @@ -884,8 +885,12 @@ storageBackendCreateQemuImgOpts(virStorageEncryptionInfoDefPtr enc, virStorageFileFormatTypeToString(info.backingFormat)); if (info.encryption) virBufferAddLit(&buf, "encryption=on,"); -if (info.preallocate) -virBufferAddLit(&buf, "preallocation=metadata,"); +if (info.preallocate) { +if (info.sparse) +virBufferAddLit(&buf, "preallocation=metadata,"); +else +virBufferAddLit(&buf, "preallocation=falloc,"); +} } if (info.nocow) @@ -1187,6 +1192,7 @@ virStorageBackendCreateQemuImgCmdFromVol(virStoragePoolObjPtr pool, .compat = vol->target.compat, .features = vol->target.features, .nocow = vol->target.nocow, +.sparse = vol->target.sparse, .secretPath = secretPath, .secretAlias = NULL, }; diff --git a/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv b/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv index 9073b1b16..b151b9401 100644 --- a/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv +++ b/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv @@ -1,4 +1,4 @@ qemu-img convert -f raw -O qcow2 \ --o encryption=on,preallocation=metadata \ +-o encryption=on,preallocation=falloc \ /var/lib/libvirt/images/sparse.img \ /var/lib/libvirt/images/OtherDemo.img -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v1 1/2] storage: extend preallocation flags support for qemu-img
On Wed, 4 Apr 2018 13:31:39 +0200 Michal Privoznik wrote: > On 04/03/2018 04:14 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > This patch adds support to qcow2 formatted filesystem object storage by > > instructing qemu-img to build them with preallocation=falloc whenever the > > XML described storage matches its . For all other > > cases the filesystem stored objects are built with preallocation=metadata. > > > > Signed-off-by: Wim ten Have ... > > VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA = 1 << 0, > > -VIR_STORAGE_VOL_CREATE_REFLINK = 1 << 1, /* perform a btrfs > > lightweight copy */ > > +VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC = 1 << 1, > > +VIR_STORAGE_VOL_CREATE_PREALLOC_FULL = 1 << 2, > > +VIR_STORAGE_VOL_CREATE_REFLINK = 1 << 3, /* perform a btrfs > > lightweight copy */ > > This is not. Imagine there's a mgmt application already written and > compiled which calls: > > virStorageVolCreateXML(flags = VIR_STORAGE_VOL_CREATE_REFLINK); > > Because it is already compiled it is effectively calling: > > virStorageVolCreateXML(flags = 1); > > and everything works. However, if this change would be merged, the mgmt > application would be still making the same call but now it would have > different semantic, because you are changing the numbering. So > effectively mgmt app would be calling > > virStorageVolCreateXML(flags = VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC) > > which is obviously wrong. We can not expect mgmt applications to be > recompiled every time there's a new release of libvirt. ... right, let me send v2. (applying and propagating XML target.sparse) > Also, storagevolxml2argvtest is failing after this change. See 'make check'. ?! ... sure I ran 'make check'. Anyways ... corrections under v2 coming forth. Rgds, - Wim. > Michal > > -- > libvir-list mailing list > libvir-list@redhat.com > https://www.redhat.com/mailman/listinfo/libvir-list -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v2 0/1] Fix qcow2 fully allocated filesystem objects.
From: Wim ten Have When tools like virt-install request to create a fully allocated filesystem object storage by setting the parameter sparse=no, libvirt doesn't allow that to happen for qcow2 formatted files. Regardless of its XML instuction request libvirt always targets its filesystem object storage with preallocation=metadata if format=qcow2 is in effect. This results in sparse files which could cause problems since total image storage potentially can overrun actual filesystem available space. Changes from v1: * Fix XML target sparse setting and apply for qcow2. * Bring the test change under same PATCH. Wim ten Have (1): storage: extend preallocation flags support for qemu-img src/conf/storage_conf.c| 3 +++ src/storage/storage_util.c | 10 -- .../qcow2-nocapacity-convert-prealloc.argv | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1 0/2] Fix qcow2 fully allocated filesystem objects.
From: Wim ten Have When tools like virt-install request to create a fully allocated filesystem object storage by setting the parameter sparse=no, libvirt doesn't allow that to happen for qcow2 formatted files. Regardless of its XML instuction request libvirt always targets its filesystem object storage with preallocation=metadata if format=qcow2 is in effect. This results in sparse files which could cause problems since total image storage potentially can overrun actual filesystem available space. (NOTE: This is a RESEND holding corrected cover letter and commit messages / Subject headers.) Wim ten Have (2): storage: extend preallocation flags support for qemu-img tests: add qemu-img test for preallocation=falloc include/libvirt/libvirt-storage.h | 5 - src/storage/storage_util.c | 21 + .../qcow2-nocapacity-convert-prealloc.argv | 2 +- 3 files changed, 22 insertions(+), 6 deletions(-) -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1 1/2] storage: extend preallocation flags support for qemu-img
From: Wim ten Have This patch adds support to qcow2 formatted filesystem object storage by instructing qemu-img to build them with preallocation=falloc whenever the XML described storage matches its . For all other cases the filesystem stored objects are built with preallocation=metadata. Signed-off-by: Wim ten Have --- include/libvirt/libvirt-storage.h | 5 - src/storage/storage_util.c| 21 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h index 413d9f6c4..2f22b388c 100644 --- a/include/libvirt/libvirt-storage.h +++ b/include/libvirt/libvirt-storage.h @@ -337,8 +337,11 @@ const char* virStorageVolGetName (virStorageVolPtr vol); const char* virStorageVolGetKey (virStorageVolPtr vol); typedef enum { +VIR_STORAGE_VOL_CREATE_PREALLOC_NONE = 0 << 0, VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA = 1 << 0, -VIR_STORAGE_VOL_CREATE_REFLINK = 1 << 1, /* perform a btrfs lightweight copy */ +VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC = 1 << 1, +VIR_STORAGE_VOL_CREATE_PREALLOC_FULL = 1 << 2, +VIR_STORAGE_VOL_CREATE_REFLINK = 1 << 3, /* perform a btrfs lightweight copy */ } virStorageVolCreateFlags; virStorageVolPtrvirStorageVolCreateXML (virStoragePoolPtr pool, diff --git a/src/storage/storage_util.c b/src/storage/storage_util.c index b4aed0f70..7728fb63e 100644 --- a/src/storage/storage_util.c +++ b/src/storage/storage_util.c @@ -852,7 +852,7 @@ struct _virStorageBackendQemuImgInfo { const char *path; unsigned long long size_arg; bool encryption; -bool preallocate; +unsigned int preallocate; const char *compat; virBitmapPtr features; bool nocow; @@ -884,8 +884,15 @@ storageBackendCreateQemuImgOpts(virStorageEncryptionInfoDefPtr enc, virStorageFileFormatTypeToString(info.backingFormat)); if (info.encryption) virBufferAddLit(&buf, "encryption=on,"); -if (info.preallocate) + +/* Handle various types of file-system storage pre-allocate sets. + */ +if (info.preallocate & VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA) virBufferAddLit(&buf, "preallocation=metadata,"); +else if (info.preallocate & VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC) +virBufferAddLit(&buf, "preallocation=falloc,"); +else if (info.preallocate & VIR_STORAGE_VOL_CREATE_PREALLOC_FULL) +virBufferAddLit(&buf, "preallocation=full,"); } if (info.nocow) @@ -1183,7 +1190,7 @@ virStorageBackendCreateQemuImgCmdFromVol(virStoragePoolObjPtr pool, .format = vol->target.format, .path = vol->target.path, .encryption = vol->target.encryption != NULL, -.preallocate = !!(flags & VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA), +.preallocate = VIR_STORAGE_VOL_CREATE_PREALLOC_NONE, .compat = vol->target.compat, .features = vol->target.features, .nocow = vol->target.nocow, @@ -1192,7 +1199,13 @@ virStorageBackendCreateQemuImgCmdFromVol(virStoragePoolObjPtr pool, }; virStorageEncryptionInfoDefPtr enc = NULL; -virCheckFlags(VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA, NULL); +if (flags) { +info.preallocate = (vol->target.capacity == vol->target.allocation) ? +VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC : VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA; +virCheckFlags((VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA| + VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC| + VIR_STORAGE_VOL_CREATE_PREALLOC_FULL), NULL); +} /* Treat output block devices as 'raw' format */ if (vol->type == VIR_STORAGE_VOL_BLOCK) -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1 2/2] tests: add qemu-img test for preallocation=falloc
From: Wim ten Have Signed-off-by: Wim ten Have --- tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv b/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv index 9073b1b16..b151b9401 100644 --- a/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv +++ b/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv @@ -1,4 +1,4 @@ qemu-img convert -f raw -O qcow2 \ --o encryption=on,preallocation=metadata \ +-o encryption=on,preallocation=falloc \ /var/lib/libvirt/images/sparse.img \ /var/lib/libvirt/images/OtherDemo.img -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH libvirt sparse files under format=qcow2 v1 1/2] libvirt can't create fully allocated qcow2 formatted storage objects.
From: Wim ten Have This patch adds support to qcow2 formatted storage objects by instructing qemu-img to build them with preallocation=falloc whenever the XML described storage matches its . For all other cases the objects are built with preallocation=metadata. Signed-off-by: Wim ten Have --- include/libvirt/libvirt-storage.h | 5 - src/storage/storage_util.c| 21 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h index 413d9f6c4..2f22b388c 100644 --- a/include/libvirt/libvirt-storage.h +++ b/include/libvirt/libvirt-storage.h @@ -337,8 +337,11 @@ const char* virStorageVolGetName (virStorageVolPtr vol); const char* virStorageVolGetKey (virStorageVolPtr vol); typedef enum { +VIR_STORAGE_VOL_CREATE_PREALLOC_NONE = 0 << 0, VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA = 1 << 0, -VIR_STORAGE_VOL_CREATE_REFLINK = 1 << 1, /* perform a btrfs lightweight copy */ +VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC = 1 << 1, +VIR_STORAGE_VOL_CREATE_PREALLOC_FULL = 1 << 2, +VIR_STORAGE_VOL_CREATE_REFLINK = 1 << 3, /* perform a btrfs lightweight copy */ } virStorageVolCreateFlags; virStorageVolPtrvirStorageVolCreateXML (virStoragePoolPtr pool, diff --git a/src/storage/storage_util.c b/src/storage/storage_util.c index b4aed0f70..7728fb63e 100644 --- a/src/storage/storage_util.c +++ b/src/storage/storage_util.c @@ -852,7 +852,7 @@ struct _virStorageBackendQemuImgInfo { const char *path; unsigned long long size_arg; bool encryption; -bool preallocate; +unsigned int preallocate; const char *compat; virBitmapPtr features; bool nocow; @@ -884,8 +884,15 @@ storageBackendCreateQemuImgOpts(virStorageEncryptionInfoDefPtr enc, virStorageFileFormatTypeToString(info.backingFormat)); if (info.encryption) virBufferAddLit(&buf, "encryption=on,"); -if (info.preallocate) + +/* Handle various types of file-system storage pre-allocate sets. + */ +if (info.preallocate & VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA) virBufferAddLit(&buf, "preallocation=metadata,"); +else if (info.preallocate & VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC) +virBufferAddLit(&buf, "preallocation=falloc,"); +else if (info.preallocate & VIR_STORAGE_VOL_CREATE_PREALLOC_FULL) +virBufferAddLit(&buf, "preallocation=full,"); } if (info.nocow) @@ -1183,7 +1190,7 @@ virStorageBackendCreateQemuImgCmdFromVol(virStoragePoolObjPtr pool, .format = vol->target.format, .path = vol->target.path, .encryption = vol->target.encryption != NULL, -.preallocate = !!(flags & VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA), +.preallocate = VIR_STORAGE_VOL_CREATE_PREALLOC_NONE, .compat = vol->target.compat, .features = vol->target.features, .nocow = vol->target.nocow, @@ -1192,7 +1199,13 @@ virStorageBackendCreateQemuImgCmdFromVol(virStoragePoolObjPtr pool, }; virStorageEncryptionInfoDefPtr enc = NULL; -virCheckFlags(VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA, NULL); +if (flags) { +info.preallocate = (vol->target.capacity == vol->target.allocation) ? +VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC : VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA; +virCheckFlags((VIR_STORAGE_VOL_CREATE_PREALLOC_METADATA| + VIR_STORAGE_VOL_CREATE_PREALLOC_FALLOC| + VIR_STORAGE_VOL_CREATE_PREALLOC_FULL), NULL); +} /* Treat output block devices as 'raw' format */ if (vol->type == VIR_STORAGE_VOL_BLOCK) -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH libvirt sparse files under format=qcow2 v1 2/2] Adjust qemu-img test required to target with preallocation=falloc.
From: Wim ten Have Signed-off-by: Wim ten Have --- tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv b/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv index 9073b1b16..b151b9401 100644 --- a/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv +++ b/tests/storagevolxml2argvdata/qcow2-nocapacity-convert-prealloc.argv @@ -1,4 +1,4 @@ qemu-img convert -f raw -O qcow2 \ --o encryption=on,preallocation=metadata \ +-o encryption=on,preallocation=falloc \ /var/lib/libvirt/images/sparse.img \ /var/lib/libvirt/images/OtherDemo.img -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH libvirt sparse files under format=qcow2 v1 0/2] Problem: libvirt fails to create fully allocated qcow2 storage objects.
From: Wim ten Have When virt-install requests to create fully allocated storage object when the parameter sparse=no is set, libvirt doesn't allow that to happen for qcow2 formatted files. Regardless of its XML instuction request libvirt always targets its filesystem object storage with preallocation=metadata if format=qcow2 is in effect. This results in sparse files which could cause problems since total image storage potentially can overrun actual filesystem available space. Wim ten Have (2): libvirt can't create fully allocated qcow2 formatted storage objects. Adjust qemu-img test required to target with preallocation=falloc. include/libvirt/libvirt-storage.h | 5 - src/storage/storage_util.c | 21 + .../qcow2-nocapacity-convert-prealloc.argv | 2 +- 3 files changed, 22 insertions(+), 6 deletions(-) -- 2.14.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v6 4/4] xlconfigtest: add tests for numa cell sibling distances
On Fri, 10 Nov 2017 13:59:25 -0700 Jim Fehlig wrote: > On 11/10/2017 01:55 PM, Jim Fehlig wrote: > > On 11/02/2017 09:47 AM, Wim Ten Have wrote: > >> From: Wim ten Have > >> > >> Test a bidirectional xen-xl domxml to and from native for vnuma > >> support administration as brought under this patch series. > >> > >> Added tests for the libxl_domain_config generator determining > >> vnuma conversion for XML-2-json and json-2-XML. > > > > I think these should be split into two patches. Essentially patch 4 in V5 > > plus a > > patch 5 with the new ibxl_domain_config generator test. > > > >> > >> Signed-off-by: Wim ten Have > >> --- > >> Changes on v5: > >> - Added tests for libxl_domain_config generator. > >> - Introduced empty stubs for xc_physinfo(), xc_sharing_freed_pages(), > >>   xc_sharing_used_frames to satisfy libxl_get_physinfo() under test > >>   simulation for libxlxml2domconfigtest.c > >> --- > >>  tests/libxlxml2domconfigdata/basic-hvm.json   | 95 > >> +- > >>  tests/libxlxml2domconfigdata/basic-hvm.xml | 66 ++- > > > > And instead of adding vNUMA config to the _basic_ test files, > > vnuma-hvm.{json,xml} seems more appropriate. I've split this out into a > > patch 5 > > and attached here for completeness. > > > > IMO this series is ready to go in. I'll wait for your ACK on the doc > > changes I > > suggested in patch 1. > > Sorry, I meant to say I'll wait for your feedback on my suggested doc changes > in > patch 1 before pushing this series. Hi Jim, I am good with your suggested changes. So this message is to; ACK Best regards, - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] Libvirt config converter can't handle file not ending with new line
On Tue, 7 Nov 2017 12:20:05 + Wei Liu wrote: > On Mon, Nov 06, 2017 at 09:41:01PM -0700, Jim Fehlig wrote: > > On 10/30/2017 06:17 AM, Wei Liu wrote: > > > Hi Jim > > > > > > I discover a problem when using xen_xl converter. When the file in > > > question doesn't end with a new line, I get the following error: > > > > > >error: configuration file syntax error: memory conf:53: expecting a > > > value > > > > I'm not able to reproduce this issue. The libvirt.git tree I tried was a bit > > dated, but even after updating to latest master I can't reproduce. > > > > > After digging a bit (but haven't read libvirt code), it appears that the > > > file didn't end with a new line. > > > > I tried several files without ending new lines, going both directions > > (domxml-to-native and domxml-from-native), but didn't see the mentioned > > error. Perhaps your config is revealing another bug which is being > > improperly reported. Can you provide an example of the problematic config? > > > > I tried to get the exact file that caused the problem but it is already > destroyed by osstest. > > A similar file: > > http://logs.test-lab.xenproject.org/osstest/logs/115436/test-amd64-amd64-libvirt-pair/debian.guest.osstest.cfg > > If you hexdump -C it, you can see the last character is 0a. Remove it and > feed the file into the converter. > Wei. The phenonomem you point out is indeed weird. And my first response is that this is a bug parsing the cfg input. I did little explore and think that src/util/virconf.c (virConfParseLong(), virConfParseValue()) should be reworked as pointed out in below context diffs. git diff diff --git a/src/util/virconf.c b/src/util/virconf.c index 39c2bd917..bc8e57ec3 100644 --- a/src/util/virconf.c +++ b/src/util/virconf.c @@ -352,7 +352,7 @@ virConfParseLong(virConfParserCtxtPtr ctxt, long long *val) } else if (CUR == '+') { NEXT; } -if ((ctxt->cur >= ctxt->end) || (!c_isdigit(CUR))) { +if ((ctxt->cur > ctxt->end) || (!c_isdigit(CUR))) { virConfError(ctxt, VIR_ERR_CONF_SYNTAX, _("unterminated number")); return -1; } @@ -456,7 +456,7 @@ virConfParseValue(virConfParserCtxtPtr ctxt) long long l = 0; SKIP_BLANKS; -if (ctxt->cur >= ctxt->end) { +if (ctxt->cur > ctxt->end) { virConfError(ctxt, VIR_ERR_CONF_SYNTAX, _("expecting a value")); return NULL; } I did not go beyond this yet. Rgds, - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v5 4/4] xlconfigtest: add tests for numa cell sibling distances
On Wed, 1 Nov 2017 20:45:46 +0100 Wim ten Have wrote: > On Thu, 26 Oct 2017 17:51:34 -0600 > Jim Fehlig wrote: > > > On 10/12/2017 01:31 PM, Wim Ten Have wrote: > > > From: Wim ten Have > > > > > > Test a bidirectional xen-xl domxml to and from native for numa > > > support administration as brought under this patch series. > > > > > > Signed-off-by: Wim ten Have > > > --- > > > .../test-fullvirt-vnuma-autocomplete.cfg | 26 +++ > > > .../test-fullvirt-vnuma-autocomplete.xml | 85 > > > ++ > > > .../test-fullvirt-vnuma-nodistances.cfg| 26 +++ > > > .../test-fullvirt-vnuma-nodistances.xml| 53 ++ > > > .../test-fullvirt-vnuma-partialdist.cfg| 26 +++ > > > .../test-fullvirt-vnuma-partialdist.xml| 60 +++ > > > tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 +++ > > > tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 > > > + > > > tests/xlconfigtest.c | 6 ++ > > > > Cool! Thanks for the various configurations to test all the hairy parsing > > code :-). > > > > Reviewed-by: Jim Fehlig > > > > BTW I should have mentioned it while reviewing 3/4, but we should also have > > tests for the libxl_domain_config generator, now that we have a test suite > > for > > that. See tests/libxlxml2domconfigtest.c. > > There's a nasty issue living here. Within specific test-harness you seem to > get through libxl_ctx_alloc() ... ?! Running as ordinary user without > privileges?! (ie. not root) > > if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, log) < 0) { > > I'd expected this libxl_ctx_alloc() to have failed with; > xencall: error: Could not obtain handle on privileged command > interface: Permission denied > libxl: error: libxl.c:108:libxl_ctx_alloc: cannot open libxc handle: > Permission denied > > Funny it seems to go through and the padded memory content seem not to > match a > real libxl_ctx_alloc(). So it is bringing some kind of ctx structure which > seems _NOT_ real or > at least incorrect. > > From I need specific context to determine host/hypervisor phys_info() ... > causing > my libxl_domain_config added test to die with a SEGV under specific > xenlight run-time. > > (gdb) where > #0 0x72e18f41 in xc.hypercall_buffer_alloc () from > /lib64/libxenctrl.so.4.8 > #1 0x72e18f98 in xc.hypercall_bounce_pre () from > /lib64/libxenctrl.so.4.8 > #2 0x72e0b962 in xc_physinfo () from /lib64/libxenctrl.so.4.8 > #3 0x7792c0bb in libxl_get_physinfo () from > /lib64/libxenlight.so.4.8 > #4 0x00414012 in libxlMakeVnumaList (def=0x66f720, ctx=0x668060, > d_config=0x7fffd170) > at libxl/libxl_conf.c:621 > #5 0x00418ca6 in libxlBuildDomainConfig (graphicsports=0x666940, > def=0x66f720, channelDir=0x0, > ctx=0x668060, caps=0x669ee0, d_config=0x7fffd170) at > libxl/libxl_conf.c:2302 > #6 0x0040f536 in testCompareXMLToDomConfig ( > xmlfile=0x666860 > "/home/wtenhave/WORK/libvirt/vNUMA/libvirt/tests/libxlxml2domconfigdata/basic-hvm.xml", > > jsonfile=0x666790 > "/home/wtenhave/WORK/libvirt/vNUMA/libvirt/tests/libxlxml2domconfigdata/basic-hvm.json") > at libxlxml2domconfigtest.c:88 > #7 0x0040f851 in testCompareXMLToDomConfigHelper (data=0x6482f0 > ) at libxlxml2domconfigtest.c:148 > > Should I escape the libxl_get_physinfo() under test? It is not really > necessary there as whole > is academic. If you have an advise please let me know ... . Not a nasty issue and resolved under PATCH v6 submitted today. Regards. - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v6 3/4] libxl: vnuma support
From: Wim ten Have This patch generates a NUMA distance-aware libxl description from the information extracted from a NUMA distance-aware libvirt XML file. By default, if no NUMA node distance information is supplied in the libvirt XML file, this patch uses the distances 10 for local and 20 for remote nodes/sockets." Signed-off-by: Wim ten Have --- Changes on v1: - Fix function calling (coding) standards. - Use virReportError(...) under all failing circumstances. - Reduce redundant (format->parse) code sorting bitmaps. - Avoid non GNU shorthand notations, difficult code practice. Changes on v2: - Have autonomous defaults applied from virDomainNumaGetNodeDistance. - Automatically make Xen driver simulate vnuma pnodes on non NUMA h/w. - Compute 'memory unit=' making it the sum of all node memory. Changes on v4: - Escape virDomainNumaGetNodeCount() if effective. Changes on v5: - User VIR_WARN to warn instead of virReportError() abuse without error. - Have ACPI set by libxlDomainDefPostParse() when virDomainNumaGetNodeCount suggests such for x86_64 under HVM. - Remove VIR_DOMAIN_DEF_PARSE_ABI_UPDATE. --- src/libxl/libxl_conf.c | 119 +++ src/libxl/libxl_domain.c | 7 ++- 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index ecbabfc79..c0dd634cd 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -593,6 +593,120 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, return 0; } +#ifdef LIBXL_HAVE_VNUMA +static int +libxlMakeVnumaList(virDomainDefPtr def, + libxl_ctx *ctx, + libxl_domain_config *d_config) +{ +int ret = -1; +size_t i, j; +size_t nr_nodes; +size_t num_vnuma; +bool simulate = false; +virBitmapPtr bitmap = NULL; +virDomainNumaPtr numa = def->numa; +libxl_domain_build_info *b_info = &d_config->b_info; +libxl_physinfo physinfo; +libxl_vnode_info *vnuma_nodes = NULL; + +if (!numa) +return 0; + +num_vnuma = virDomainNumaGetNodeCount(numa); +if (!num_vnuma) +return 0; + +libxl_physinfo_init(&physinfo); +if (libxl_get_physinfo(ctx, &physinfo) < 0) { +libxl_physinfo_dispose(&physinfo); +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_physinfo_info failed")); +return -1; +} +nr_nodes = physinfo.nr_nodes; +libxl_physinfo_dispose(&physinfo); + +if (num_vnuma > nr_nodes) { +VIR_WARN("Number of configured numa cells %zu exceeds the physical available nodes %zu, guest simulates numa", + num_vnuma, nr_nodes); +simulate = true; +} + +/* + * allocate the vnuma_nodes for assignment under b_info. + */ +if (VIR_ALLOC_N(vnuma_nodes, num_vnuma) < 0) +return -1; + +/* + * parse the vnuma vnodes data. + */ +for (i = 0; i < num_vnuma; i++) { +int cpu; +libxl_bitmap vcpu_bitmap; +libxl_vnode_info *p = &vnuma_nodes[i]; + +libxl_vnode_info_init(p); + +/* pnode */ +p->pnode = simulate ? 0 : i; + +/* memory size */ +p->memkb = virDomainNumaGetNodeMemorySize(numa, i); + +/* vcpus */ +bitmap = virDomainNumaGetNodeCpumask(numa, i); +if (bitmap == NULL) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma sibling %zu missing vcpus set"), i); +goto cleanup; +} + +if ((cpu = virBitmapNextSetBit(bitmap, -1)) < 0) +goto cleanup; + +libxl_bitmap_init(&vcpu_bitmap); +if (libxl_cpu_bitmap_alloc(ctx, &vcpu_bitmap, b_info->max_vcpus)) { +virReportOOMError(); +goto cleanup; +} + +do { +libxl_bitmap_set(&vcpu_bitmap, cpu); +} while ((cpu = virBitmapNextSetBit(bitmap, cpu)) >= 0); + +libxl_bitmap_copy_alloc(ctx, &p->vcpus, &vcpu_bitmap); +libxl_bitmap_dispose(&vcpu_bitmap); + +/* vdistances */ +if (VIR_ALLOC_N(p->distances, num_vnuma) < 0) +goto cleanup; +p->num_distances = num_vnuma; + +for (j = 0; j < num_vnuma; j++) +p->distances[j] = virDomainNumaGetNodeDistance(numa, i, j); +} + +b_info->vnuma_nodes = vnuma_nodes; +b_info->num_vnuma_nodes = num_vnuma; + +ret = 0; + + cleanup: +if (ret) { +for (i = 0; i < num_vnuma; i++) { +libxl_vnode_info *p = &vnuma_nodes[i]; + +VIR_FREE(p->distances); +} +VIR_FREE(vnuma_nodes); +} + +return ret; +} +#endif + static int libxlDiskSetDiscard(libxl_device_disk *x_disk, int discard) { @@ -2184,6 +2298,11 @@ libxlBuildDomainConfig(virPortAllocatorPtr gr
[libvirt] [PATCH v6 1/4] numa: describe siblings distances within cells
From: Wim ten Have Add support for describing NUMA distances in a domain's XML description. Below is an example of a 4 node setup: A defines a NUMA node. describes the NUMA distance from the to the other NUMA nodes (the s). For example, in above XML description, the distance between NUMA node0 and NUMA node2 is 31. Valid distance value are '10 <= value <= 255'. A distance value of 10 represents the distance to the node itself. A distance value of 20 represents the default value for remote nodes but other values are possible depending on the physical topology of the system. When distances are not fully described, any missing sibling distance values will default to 10 for local nodes and 20 for remote nodes. If distance is given for A -> B, then we default B -> A to the same value instead of 20. Signed-off-by: Wim ten Have Reviewed-by: Daniel P. Berrange --- Changes on v1: - Add changes to docs/formatdomain.html.in describing schema update. Changes on v2: - Automatically apply distance symmetry maintaining cell <-> sibling. - Check for maximum '255' on numaDistanceValue. - Automatically complete empty distance ranges. - Check that sibling_id's are in range with cell identifiers. - Allow non-contiguous ranges, starting from any node id. - Respect parameters as ATTRIBUTE_NONNULL fix functions and callers. - Add and apply topology for LOCAL_DISTANCE=10 and REMOTE_DISTANCE=20. Changes on v3: - Add UNREACHABLE if one locality is unreachable from another. - Add code cleanup aligning function naming in a separated patch. - Add numa related driver code in a separated patch. - Remove from numaDistanceValue schema/basictypes.rng - Correct doc changes. Changes on v4: - Fix symmetry error under virDomainNumaDefNodeDistanceParseXML() Changes on v5: - Apply doc suggestions. - Apply virReportError() message suggestions. - Remove redundant sanity checks from virDomainNumaDefNodeDistanceParseXML(). --- docs/formatdomain.html.in | 63 ++- docs/schemas/basictypes.rng | 7 ++ docs/schemas/cputypes.rng | 18 + src/conf/numa_conf.c| 191 +++- 4 files changed, 275 insertions(+), 4 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 4f28dce35..df2f17f5d 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1529,7 +1529,68 @@ - This guest NUMA specification is currently available only for QEMU/KVM. + This guest NUMA specification is currently available only for + QEMU/KVM and Xen. Whereas Xen driver also allows for a distinct + description of NUMA arranged sibling cell + distances Since 3.9.0. + + + + Under NUMA hardware architecture, distinct resources such as memory + create a designated distance between cell and + siblings that now can be described with the help of + distances. A detailed description can be found within + the ACPI (Advanced Configuration and Power Interface Specification) + within the chapter explaining the system's SLIT (System Locality + Distance Information Table). + + + +... +<cpu> + ... + <numa> +<cell id='0' cpus='0,4-7' memory='512000' unit='KiB'> + <distances> +<sibling id='0' value='10'/> +<sibling id='1' value='21'/> +<sibling id='2' value='31'/> +<sibling id='3' value='41'/> + </distances> +</cell> +<cell id='1' cpus='1,8-10,12-15' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='21'/> +<sibling id='1' value='10'/> +<sibling id='2' value='21'/> +<sibling id='3' value='31'/> + </distances> +</cell> +<cell id='2' cpus='2,11' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='31'/> +<sibling id='1' value='21'/> +<sibling id='2' value='10'/> +<sibling id='3' value='21'/> + </distances> +</cell> +<cell id='3' cpus='3' memory='512000
[libvirt] [PATCH v6 0/4] numa: describe sibling nodes distances
From: Wim ten Have This patch extends guest domain administration adding support to advertise node sibling distances when configuring NUMA guests also referred to as vNUMA (Virtual NUMA). NUMA (Non-Uniform Memory Access), a method of configuring a cluster of nodes within a single multiprocessing system such that it shares processor local memory amongst others improving performance and the ability of the system to be expanded. A NUMA system could be illustrated as shown below. Within this 4-NODE system, every socket is equipped with its own distinct memory and some with I/O. Access to memory or I/O on remote nodes is only possible through the "Interconnect". This results in different performance for local and remote resources. In contrast to NUMA we recognize the flat SMP system where no concept of local or remote resource exists. The disadvantage of high socket count SMP systems is that the shared bus can easily become a performance bottleneck under high activity. +-+---++---+-+ |NODE0| | || | |NODE3| | | CPU00 | CPU03 || CPU12 | CPU15 | | | | | || | | | | Mem +--- Socket0 ---<>--- Socket3 ---+ Mem | | | | || | | | +-+ CPU01 | CPU02 || CPU13 | CPU14 | | | I/O | | || | | | +-+---^---++---^---+-+ || | Interconnect | || +-v---++---v-+ |NODE1| | || | |NODE2| | | CPU04 | CPU07 || CPU08 | CPU11 | | | | | || | | | | Mem +--- Socket1 ---<>--- Socket2 ---+ Mem | | | | || | | | +-+ CPU05 | CPU06 || CPU09 | CPU10 | | | I/O | | || | | | +-+---+---++---+---+-+ NUMA adds an intermediate level of memory shared amongst a few cores per socket as illustrated above, so that data accesses do not have to travel over a single bus. Unfortunately the way NUMA does this adds its own limitations. This, as visualized in the illustration above, happens when data is stored in memory associated with Socket2 and is accessed by a CPU (core) in Socket0. The processors use the "Interconnect" path to access resource on other nodes. These "Interconnect" hops add data access delays. It is therefore in our interest to describe the relative distances between nodes. The relative distances between nodes are described in the system's SLIT (System Locality Distance Information Table) which is part of the ACPI (Advanced Configuration and Power Interface) specification. On Linux systems the SLIT detail can be listed with help of the 'numactl -H' command. The above guest would show the following output. [root@f25 ~]# numactl -H available: 4 nodes (0-3) node 0 cpus: 0 1 2 3 node 0 size: 2007 MB node 0 free: 1931 MB node 1 cpus: 4 5 6 7 node 1 size: 1951 MB node 1 free: 1902 MB node 2 cpus: 8 9 10 11 node 2 size: 1998 MB node 2 free: 1910 MB node 3 cpus: 12 13 14 15 node 3 size: 2015 MB node 3 free: 1907 MB node distances: node 0 1 2 3 0: 10 21 31 21 1: 21 10 21 31 2: 31 21 10 21 3: 21 31 21 10 These patches extend core libvirt's XML description of NUMA cells to include NUMA distance information and propagate it to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"). The core libvirt extensions in this patch set could be used to propagate NUMA distances to qemu quests in the future. Wim ten Have (4): numa: describe siblings distances within cells xenconfig: add domxml conversions for xen-xl libxl: vnuma support xlconfigtest: add tests for numa cell sibling distances docs/formatdomain.html.in | 63 +++- docs/schemas/basictypes.rng| 7 + docs/schemas/cputypes.rng | 18 ++ src/conf/numa_conf.c | 328 +++- src/conf/numa_conf.h | 25 ++ src/libvirt_private.syms | 5 + src/libxl/libxl_conf.c | 119 src/libxl/libxl_domain.c | 7 +- src/xenconfig/xen_xl.c | 335 + tests/libxlxml2domconfigdata/basic-hvm.json| 95 +- tests/libxl
[libvirt] [PATCH v6 4/4] xlconfigtest: add tests for numa cell sibling distances
From: Wim ten Have Test a bidirectional xen-xl domxml to and from native for vnuma support administration as brought under this patch series. Added tests for the libxl_domain_config generator determining vnuma conversion for XML-2-json and json-2-XML. Signed-off-by: Wim ten Have --- Changes on v5: - Added tests for libxl_domain_config generator. - Introduced empty stubs for xc_physinfo(), xc_sharing_freed_pages(), xc_sharing_used_frames to satisfy libxl_get_physinfo() under test simulation for libxlxml2domconfigtest.c --- tests/libxlxml2domconfigdata/basic-hvm.json| 95 +- tests/libxlxml2domconfigdata/basic-hvm.xml | 66 ++- tests/virmocklibxl.c | 13 +++ .../test-fullvirt-vnuma-autocomplete.cfg | 26 ++ .../test-fullvirt-vnuma-autocomplete.xml | 85 +++ .../test-fullvirt-vnuma-nodistances.cfg| 26 ++ .../test-fullvirt-vnuma-nodistances.xml| 53 .../test-fullvirt-vnuma-partialdist.cfg| 26 ++ .../test-fullvirt-vnuma-partialdist.xml| 60 ++ tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 ++ tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 ++ tests/xlconfigtest.c | 6 ++ 12 files changed, 559 insertions(+), 4 deletions(-) create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-partialdist.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-partialdist.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.xml diff --git a/tests/libxlxml2domconfigdata/basic-hvm.json b/tests/libxlxml2domconfigdata/basic-hvm.json index 6fa41f34f..3a5071e14 100644 --- a/tests/libxlxml2domconfigdata/basic-hvm.json +++ b/tests/libxlxml2domconfigdata/basic-hvm.json @@ -5,17 +5,105 @@ "uuid": "2147d599-9cc6-c0dc-92ab-4064b5446e9b" }, "b_info": { -"max_vcpus": 4, +"max_vcpus": 6, "avail_vcpus": [ 0, 1, 2, -3 +3, +4, +5 +], +"vnuma_nodes": [ +{ +"memkb": 2097152, +"distances": [ +10, +21, +31, +41, +51, +61 +], +"vcpus": [ +0 +] +}, +{ +"memkb": 2097152, +"distances": [ +21, +10, +21, +31, +41, +51 +], +"vcpus": [ +1 +] +}, +{ +"memkb": 2097152, +"distances": [ +31, +21, +10, +21, +31, +41 +], +"vcpus": [ +2 +] +}, +{ +"memkb": 2097152, +"distances": [ +41, +31, +21, +10, +21, +31 +], +"vcpus": [ +3 +] +}, +{ +"memkb": 2097152, +"distances": [ +51, +41, +31, +21, +10, +21 +], +"vcpus": [ +4 +] +}, +{ +"memkb": 2097152, +"distances": [ +61, +51, +41, +31, +21, +10 +], +"vcpus": [ +5 +] +} ], "max_memkb": 1048576, "target_memkb": 1048576, "video_memkb": 8192, -"shadow_memkb&quo
[libvirt] [PATCH v6 2/4] xenconfig: add domxml conversions for xen-xl
From: Wim ten Have This patch converts NUMA configurations between the Xen libxl configuration file format and libvirt's XML format. XML HVM domain on a 4 node (2 cores/socket) configuration: Xen xl.cfg domain configuration: vnuma = [["pnode=0","size=2048","vcpus=0-1","vdistances=10,21,31,21"], ["pnode=1","size=2048","vcpus=2-3","vdistances=21,10,21,31"], ["pnode=2","size=2048","vcpus=4-5","vdistances=31,21,10,21"], ["pnode=3","size=2048","vcpus=6-7","vdistances=21,31,21,10"]] If there is no XML description amongst the data the conversion schema from xml to native will generate 10 for local and 20 for all remote instances. Signed-off-by: Wim ten Have --- Changes on v2: - Reduce the indentation level under xenParseXLVnuma(). Changes on v3: - Add the Numa core split functions required to interface. Changes on v5: - Apply suggested cosmetic suggestions. - Apply virReportError() message suggestions. - Order prototyping for Getters and Setters under numa_conf.h - Break function arguments to one parameter per line conform code style. --- src/conf/numa_conf.c | 137 +++ src/conf/numa_conf.h | 25 src/libvirt_private.syms | 5 + src/xenconfig/xen_xl.c | 335 +++ 4 files changed, 502 insertions(+) diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index 5fbcc7204..7bba4120b 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -1114,6 +1114,132 @@ virDomainNumaGetNodeCount(virDomainNumaPtr numa) } +size_t +virDomainNumaSetNodeCount(virDomainNumaPtr numa, size_t nmem_nodes) +{ +if (!nmem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot set an empty mem_nodes set")); +return 0; +} + +if (numa->mem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot alter an existing mem_nodes set")); +return 0; +} + +if (VIR_ALLOC_N(numa->mem_nodes, nmem_nodes) < 0) +return 0; + +numa->nmem_nodes = nmem_nodes; + +return numa->nmem_nodes; +} + +size_t +virDomainNumaGetNodeDistance(virDomainNumaPtr numa, + size_t node, + size_t cellid) +{ +virDomainNumaDistancePtr distances = NULL; + +if (node < numa->nmem_nodes) +distances = numa->mem_nodes[node].distances; + +/* + * Present the configured distance value. If + * out of range or not available set the platform + * defined default for local and remote nodes. + */ +if (!distances || +!distances[cellid].value || +!numa->mem_nodes[node].ndistances) +return (node == cellid) ? LOCAL_DISTANCE : REMOTE_DISTANCE; + +return distances[cellid].value; +} + + +int +virDomainNumaSetNodeDistance(virDomainNumaPtr numa, + size_t node, + size_t cellid, + unsigned int value) +{ +virDomainNumaDistancePtr distances; + +if (node >= numa->nmem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("Argument 'node' %zu outranges " + "defined number of NUMA nodes"), + node); +return -1; +} + +distances = numa->mem_nodes[node].distances; +if (!distances || +cellid >= numa->mem_nodes[node].ndistances) { +virReportError(VIR_ERR_XML_ERROR, "%s", + _("Arguments under memnode element do not " + "correspond with existing guest's NUMA cell")); +return -1; +} + +/* + * Advanced Configuration and Power Interface + * Specification version 6.1. Chapter 5.2.17 + * System Locality Distance Information Table + * ... Distance values of 0-9 are reserved. + */ +if (value < LOCAL_DISTANCE || +value > UNREACHABLE) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Distance value of %d is not in valid range"), + value); +return -1; +} + +if (value == LOCAL_DISTANCE && node != cellid) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Distance value %d und
Re: [libvirt] [PATCH v5 4/4] xlconfigtest: add tests for numa cell sibling distances
On Thu, 26 Oct 2017 17:51:34 -0600 Jim Fehlig wrote: > On 10/12/2017 01:31 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Test a bidirectional xen-xl domxml to and from native for numa > > support administration as brought under this patch series. > > > > Signed-off-by: Wim ten Have > > --- > > .../test-fullvirt-vnuma-autocomplete.cfg | 26 +++ > > .../test-fullvirt-vnuma-autocomplete.xml | 85 > > ++ > > .../test-fullvirt-vnuma-nodistances.cfg| 26 +++ > > .../test-fullvirt-vnuma-nodistances.xml| 53 ++ > > .../test-fullvirt-vnuma-partialdist.cfg| 26 +++ > > .../test-fullvirt-vnuma-partialdist.xml| 60 +++ > > tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 +++ > > tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 > > + > > tests/xlconfigtest.c | 6 ++ > > Cool! Thanks for the various configurations to test all the hairy parsing > code :-). > > Reviewed-by: Jim Fehlig > > BTW I should have mentioned it while reviewing 3/4, but we should also have > tests for the libxl_domain_config generator, now that we have a test suite > for > that. See tests/libxlxml2domconfigtest.c. There's a nasty issue living here. Within specific test-harness you seem to get through libxl_ctx_alloc() ... ?! Running as ordinary user without privileges?! (ie. not root) if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, log) < 0) { I'd expected this libxl_ctx_alloc() to have failed with; xencall: error: Could not obtain handle on privileged command interface: Permission denied libxl: error: libxl.c:108:libxl_ctx_alloc: cannot open libxc handle: Permission denied Funny it seems to go through and the padded memory content seem not to match a real libxl_ctx_alloc(). So it is bringing some kind of ctx structure which seems _NOT_ real or at least incorrect. From I need specific context to determine host/hypervisor phys_info() ... causing my libxl_domain_config added test to die with a SEGV under specific xenlight run-time. (gdb) where #0 0x72e18f41 in xc.hypercall_buffer_alloc () from /lib64/libxenctrl.so.4.8 #1 0x72e18f98 in xc.hypercall_bounce_pre () from /lib64/libxenctrl.so.4.8 #2 0x72e0b962 in xc_physinfo () from /lib64/libxenctrl.so.4.8 #3 0x7792c0bb in libxl_get_physinfo () from /lib64/libxenlight.so.4.8 #4 0x00414012 in libxlMakeVnumaList (def=0x66f720, ctx=0x668060, d_config=0x7fffd170) at libxl/libxl_conf.c:621 #5 0x00418ca6 in libxlBuildDomainConfig (graphicsports=0x666940, def=0x66f720, channelDir=0x0, ctx=0x668060, caps=0x669ee0, d_config=0x7fffd170) at libxl/libxl_conf.c:2302 #6 0x0040f536 in testCompareXMLToDomConfig ( xmlfile=0x666860 "/home/wtenhave/WORK/libvirt/vNUMA/libvirt/tests/libxlxml2domconfigdata/basic-hvm.xml", jsonfile=0x666790 "/home/wtenhave/WORK/libvirt/vNUMA/libvirt/tests/libxlxml2domconfigdata/basic-hvm.json") at libxlxml2domconfigtest.c:88 #7 0x0040f851 in testCompareXMLToDomConfigHelper (data=0x6482f0 ) at libxlxml2domconfigtest.c:148 Should I escape the libxl_get_physinfo() under test? It is not really necessary there as whole is academic. If you have an advise please let me know ... . Regards, - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v5 2/4] xenconfig: add domxml conversions for xen-xl
On Thu, 26 Oct 2017 16:46:33 -0600 Jim Fehlig wrote: > On 10/12/2017 01:31 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > This patch converts NUMA configurations between the Xen libxl > > configuration file format and libvirt's XML format. ... > > Xen xl.cfg domain configuration: > > > >vnuma = [["pnode=0","size=2048","vcpus=0-1","vdistances=10,21,31,21"], > > ["pnode=1","size=2048","vcpus=2-3","vdistances=21,10,21,31"], > > ["pnode=2","size=2048","vcpus=4-5","vdistances=31,21,10,21"], > > ["pnode=3","size=2048","vcpus=6-7","vdistances=21,31,21,10"]] > > > > If there is no XML description amongst the data the > > conversion schema from xml to native will generate 10 for local and 20 > > for all remote instances. > > Does xl have the same behavior? E.g. with Yes. > vnuma = [["pnode=0","size=2048","vcpus=0-1"], > ["pnode=1","size=2048","vcpus=2-3"], > ["pnode=2","size=2048","vcpus=4-5"], > ["pnode=3","size=2048","vcpus=6-7"]] > > will distances be 10 for local and 20 for remote nodes? Yes. > > Signed-off-by: Wim ten Have > Other than the nits, looks good. Will address all other brought comments under v6. Regards, - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v5 3/4] libxl: vnuma support
On Thu, 26 Oct 2017 17:40:37 -0600 Jim Fehlig wrote: > On 10/12/2017 01:31 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > This patch generates a NUMA distance-aware libxl description from the > > information extracted from a NUMA distance-aware libvirt XML file. ... > > +/* pnode */ > > +p->pnode = simulate ? 0 : i; > > So any time the number of vnuma nodes exceeds the number of physical nodes, > all > vnuma nodes are confined to physical node 0? Yes. > Does xl behave this way too? Yes. > > + > > +/* memory size */ > > +p->memkb = virDomainNumaGetNodeMemorySize(numa, i); > > + > > +/* vcpus */ > > +bitmap = virDomainNumaGetNodeCpumask(numa, i); > > +if (bitmap == NULL) { > > +virReportError(VIR_ERR_INTERNAL_ERROR, > > + _("vnuma sibling %zu missing vcpus set"), i); > > +goto cleanup; > > +} > > + > > +if ((cpu = virBitmapNextSetBit(bitmap, -1)) < 0) > > +goto cleanup; > > + > > +libxl_bitmap_init(&vcpu_bitmap); > > +if (libxl_cpu_bitmap_alloc(ctx, &vcpu_bitmap, b_info->max_vcpus)) { > > +virReportOOMError(); > > +goto cleanup; > > +} > > + > > +do { > > +libxl_bitmap_set(&vcpu_bitmap, cpu); > > +} while ((cpu = virBitmapNextSetBit(bitmap, cpu)) >= 0); > > + > > +libxl_bitmap_copy_alloc(ctx, &p->vcpus, &vcpu_bitmap); > > +libxl_bitmap_dispose(&vcpu_bitmap); > > + > > +/* vdistances */ > > +if (VIR_ALLOC_N(p->distances, num_vnuma) < 0) > > +goto cleanup; > > +p->num_distances = num_vnuma; > > + > > +for (j = 0; j < num_vnuma; j++) > > +p->distances[j] = virDomainNumaGetNodeDistance(numa, i, j); > > +} > > + > > +b_info->vnuma_nodes = vnuma_nodes; > > +b_info->num_vnuma_nodes = num_vnuma; > > + > > +ret = 0; > > + > > + cleanup: > > +if (ret) { > > +for (i = 0; i < num_vnuma; i++) { > > +libxl_vnode_info *p = &vnuma_nodes[i]; > > + > > +VIR_FREE(p->distances); > > +} > > +VIR_FREE(vnuma_nodes); > > +} > > + > > +return ret; > > +} > > +#endif > > + > > static int > > libxlDiskSetDiscard(libxl_device_disk *x_disk, int discard) > > { > > @@ -2209,6 +2325,11 @@ libxlBuildDomainConfig(virPortAllocatorPtr > > graphicsports, > > if (libxlMakeDomBuildInfo(def, ctx, caps, d_config) < 0) > > return -1; > > > > +#ifdef LIBXL_HAVE_VNUMA > > +if (libxlMakeVnumaList(def, ctx, d_config) < 0) > > +return -1; > > +#endif > > + > > if (libxlMakeDiskList(def, d_config) < 0) > > return -1; > > > > diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c > > index 8483d6ecf..656f4a82d 100644 > > --- a/src/libxl/libxl_driver.c > > +++ b/src/libxl/libxl_driver.c > > @@ -2788,7 +2788,8 @@ libxlDomainDefineXMLFlags(virConnectPtr conn, const > > char *xml, unsigned int flag > > virCheckFlags(VIR_DOMAIN_DEFINE_VALIDATE, NULL); > > > > if (flags & VIR_DOMAIN_DEFINE_VALIDATE) > > -parse_flags |= VIR_DOMAIN_DEF_PARSE_VALIDATE_SCHEMA; > > +parse_flags |= (VIR_DOMAIN_DEF_PARSE_VALIDATE_SCHEMA | > > +VIR_DOMAIN_DEF_PARSE_ABI_UPDATE); > > Why is this change needed? In its current form, the patch doesn't touch any > code > in the domain def post parse callback. I remove this. I added this to forcibly recompute memory size in case total_memory held a value not matching the numa cells setting. (!= 0) See virDomainDefPostParseMemory() /* Attempt to infer the initial memory size from the sum NUMA memory sizes * in case ABI updates are allowed or the element wasn't specified */ if (def->mem.total_memory == 0 || parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE || parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE_MIGRATION) numaMemory = virDomainNumaGetMemorySize(def->numa); Will address all other brought comments under v6. Regards, - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v5 1/4] numa: describe siblings distances within cells
From: Wim ten Have Add support for describing NUMA distances in a domain's XML description. Below is an example of a 4 node setup: A defines a NUMA node. describes the NUMA distance from the to the other NUMA nodes (the s). For example, in above XML description, the distance between NUMA node0 and NUMA node2 is 31. Valid distance value are '10 <= value <= 255'. A distance value of 10 represents the distance to the node itself. A distance value of 20 represents the default value for remote nodes but other values are possible depending on the physical topology of the system. When distances are not fully described, any missing sibling distance values will default to 10 for local nodes and 20 for remote nodes. Signed-off-by: Wim ten Have --- Changes on v1: - Add changes to docs/formatdomain.html.in describing schema update. Changes on v2: - Automatically apply distance symmetry maintaining cell <-> sibling. - Check for maximum '255' on numaDistanceValue. - Automatically complete empty distance ranges. - Check that sibling_id's are in range with cell identifiers. - Allow non-contiguous ranges, starting from any node id. - Respect parameters as ATTRIBUTE_NONNULL fix functions and callers. - Add and apply topology for LOCAL_DISTANCE=10 and REMOTE_DISTANCE=20. Changes on v3 - Add UNREACHABLE if one locality is unreachable from another. - Add code cleanup aligning function naming in a separated patch. - Add numa related driver code in a separated patch. - Remove from numaDistanceValue schema/basictypes.rng - Correct doc changes. Changes on v4 - Fix symmetry error under virDomainNumaDefNodeDistanceParseXML() --- docs/formatdomain.html.in | 63 - docs/schemas/basictypes.rng | 7 ++ docs/schemas/cputypes.rng | 18 src/conf/numa_conf.c| 221 +++- 4 files changed, 305 insertions(+), 4 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index c0e3c2221..ab2a89c6a 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1529,7 +1529,68 @@ - This guest NUMA specification is currently available only for QEMU/KVM. + This guest NUMA specification is currently available only for + QEMU/KVM and Xen. Whereas Xen driver also allows for a distinct + description of NUMA arranged sibling cell + distances Since 3.8.0. + + + + Under NUMA h/w architecture, distinct resources such as memory + create a designated distance between cell and + siblings that now can be described with the help of + distances. A detailed description can be found within + the ACPI (Advanced Configuration and Power Interface Specification) + within the chapter explaining the system's SLIT (System Locality + Distance Information Table). + + + +... +<cpu> + ... + <numa> +<cell id='0' cpus='0,4-7' memory='512000' unit='KiB'> + <distances> +<sibling id='0' value='10'/> +<sibling id='1' value='21'/> +<sibling id='2' value='31'/> +<sibling id='3' value='41'/> + </distances> +</cell> +<cell id='1' cpus='1,8-10,12-15' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='21'/> +<sibling id='1' value='10'/> +<sibling id='2' value='21'/> +<sibling id='3' value='31'/> + </distances> +</cell> +<cell id='2' cpus='2,11' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='31'/> +<sibling id='1' value='21'/> +<sibling id='2' value='10'/> +<sibling id='3' value='21'/> + </distances> +</cell> +<cell id='3' cpus='3' memory='512000' unit='KiB'> + <distances> +<sibling id='0' value='41'/> +<sibling id='1' value='31'/> +<sibling id='2' value='21'/> +<sibling id='3' value='10
[libvirt] [PATCH v5 4/4] xlconfigtest: add tests for numa cell sibling distances
From: Wim ten Have Test a bidirectional xen-xl domxml to and from native for numa support administration as brought under this patch series. Signed-off-by: Wim ten Have --- .../test-fullvirt-vnuma-autocomplete.cfg | 26 +++ .../test-fullvirt-vnuma-autocomplete.xml | 85 ++ .../test-fullvirt-vnuma-nodistances.cfg| 26 +++ .../test-fullvirt-vnuma-nodistances.xml| 53 ++ .../test-fullvirt-vnuma-partialdist.cfg| 26 +++ .../test-fullvirt-vnuma-partialdist.xml| 60 +++ tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 +++ tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 + tests/xlconfigtest.c | 6 ++ 9 files changed, 389 insertions(+) create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-partialdist.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-partialdist.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.xml diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg new file mode 100644 index 0..edba69a17 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 12 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0,11", "vdistances=10,21,31,41,51,61" ], [ "pnode=1", "size=2048", "vcpus=1,10", "vdistances=21,10,21,31,41,51" ], [ "pnode=2", "size=2048", "vcpus=2,9", "vdistances=31,21,10,21,31,41" ], [ "pnode=3", "size=2048", "vcpus=3,8", "vdistances=41,31,21,10,21,31" ], [ "pnode=4", "size=2048", "vcpus=4,7", "vdistances=51,41,31,21,10,21" ], [ "pnode=5", "size=2048", "vcpus=5-6", "vdistances=61,51,41,31,21,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml new file mode 100644 index 0..e3639eb04 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml @@ -0,0 +1,85 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 12 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg new file mode 100644 index 0..8186edfee --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial =
[libvirt] [PATCH v5 3/4] libxl: vnuma support
From: Wim ten Have This patch generates a NUMA distance-aware libxl description from the information extracted from a NUMA distance-aware libvirt XML file. By default, if no NUMA node distance information is supplied in the libvirt XML file, this patch uses the distances 10 for local and 20 for remote nodes/sockets." Signed-off-by: Wim ten Have --- Changes on v1: - Fix function calling (coding) standards. - Use virReportError(...) under all failing circumstances. - Reduce redundant (format->parse) code sorting bitmaps. - Avoid non GNU shorthand notations, difficult code practice. Changes on v2: - Have autonomous defaults applied from virDomainNumaGetNodeDistance. - Automatically make Xen driver simulate vnuma pnodes on non NUMA h/w. - Compute 'memory unit=' making it the sum of all node memory. Changes on v4: - Escape virDomainNumaGetNodeCount() if effective. --- src/libxl/libxl_conf.c | 121 +++ src/libxl/libxl_driver.c | 3 +- 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index 34233a955..adac6c19c 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -374,6 +374,7 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, def->features[VIR_DOMAIN_FEATURE_APIC] == VIR_TRISTATE_SWITCH_ON); libxl_defbool_set(&b_info->u.hvm.acpi, + virDomainNumaGetNodeCount(def->numa) > 0 || def->features[VIR_DOMAIN_FEATURE_ACPI] == VIR_TRISTATE_SWITCH_ON); @@ -618,6 +619,121 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, return 0; } +#ifdef LIBXL_HAVE_VNUMA +static int +libxlMakeVnumaList(virDomainDefPtr def, + libxl_ctx *ctx, + libxl_domain_config *d_config) +{ +int ret = -1; +size_t i, j; +size_t nr_nodes; +size_t num_vnuma; +bool simulate = false; +virBitmapPtr bitmap = NULL; +virDomainNumaPtr numa = def->numa; +libxl_domain_build_info *b_info = &d_config->b_info; +libxl_physinfo physinfo; +libxl_vnode_info *vnuma_nodes = NULL; + +if (!numa) +return 0; + +num_vnuma = virDomainNumaGetNodeCount(numa); +if (!num_vnuma) +return 0; + +libxl_physinfo_init(&physinfo); +if (libxl_get_physinfo(ctx, &physinfo) < 0) { +libxl_physinfo_dispose(&physinfo); +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_physinfo_info failed")); +return -1; +} +nr_nodes = physinfo.nr_nodes; +libxl_physinfo_dispose(&physinfo); + +if (num_vnuma > nr_nodes) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Number of configured numa cells %zu exceeds the physical available nodes %zu, guest simulates numa"), + num_vnuma, nr_nodes); +simulate = true; +} + +/* + * allocate the vnuma_nodes for assignment under b_info. + */ +if (VIR_ALLOC_N(vnuma_nodes, num_vnuma) < 0) +return -1; + +/* + * parse the vnuma vnodes data. + */ +for (i = 0; i < num_vnuma; i++) { +int cpu; +libxl_bitmap vcpu_bitmap; +libxl_vnode_info *p = &vnuma_nodes[i]; + +libxl_vnode_info_init(p); + +/* pnode */ +p->pnode = simulate ? 0 : i; + +/* memory size */ +p->memkb = virDomainNumaGetNodeMemorySize(numa, i); + +/* vcpus */ +bitmap = virDomainNumaGetNodeCpumask(numa, i); +if (bitmap == NULL) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma sibling %zu missing vcpus set"), i); +goto cleanup; +} + +if ((cpu = virBitmapNextSetBit(bitmap, -1)) < 0) +goto cleanup; + +libxl_bitmap_init(&vcpu_bitmap); +if (libxl_cpu_bitmap_alloc(ctx, &vcpu_bitmap, b_info->max_vcpus)) { +virReportOOMError(); +goto cleanup; +} + +do { +libxl_bitmap_set(&vcpu_bitmap, cpu); +} while ((cpu = virBitmapNextSetBit(bitmap, cpu)) >= 0); + +libxl_bitmap_copy_alloc(ctx, &p->vcpus, &vcpu_bitmap); +libxl_bitmap_dispose(&vcpu_bitmap); + +/* vdistances */ +if (VIR_ALLOC_N(p->distances, num_vnuma) < 0) +goto cleanup; +p->num_distances = num_vnuma; + +for (j = 0; j < num_vnuma; j++) +p->distances[j] = virDomainNumaGetNodeDistance(numa, i, j); +} + +b_info->vnuma_nodes = vnuma_nodes; +b_info->num_vnuma_nodes = num_vnuma; + +ret = 0; + + cleanup: +if (ret) { +for (i = 0; i < num_vnuma; i++) { +libxl_vnode_info *p = &vnuma_nodes[i]; + +
[libvirt] [PATCH v5 2/4] xenconfig: add domxml conversions for xen-xl
From: Wim ten Have This patch converts NUMA configurations between the Xen libxl configuration file format and libvirt's XML format. XML HVM domain on a 4 node (2 cores/socket) configuration: Xen xl.cfg domain configuration: vnuma = [["pnode=0","size=2048","vcpus=0-1","vdistances=10,21,31,21"], ["pnode=1","size=2048","vcpus=2-3","vdistances=21,10,21,31"], ["pnode=2","size=2048","vcpus=4-5","vdistances=31,21,10,21"], ["pnode=3","size=2048","vcpus=6-7","vdistances=21,31,21,10"]] If there is no XML description amongst the data the conversion schema from xml to native will generate 10 for local and 20 for all remote instances. Signed-off-by: Wim ten Have --- Changes on v2: - Reduce the indentation level under xenParseXLVnuma(). Changes on v3: - Add the Numa core split functions required to interface. --- src/conf/numa_conf.c | 138 src/conf/numa_conf.h | 20 +++ src/libvirt_private.syms | 5 + src/xenconfig/xen_xl.c | 333 +++ 4 files changed, 496 insertions(+) diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index 00a3343fb..b513b1de1 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -1144,6 +1144,133 @@ virDomainNumaGetNodeCount(virDomainNumaPtr numa) } +size_t +virDomainNumaSetNodeCount(virDomainNumaPtr numa, size_t nmem_nodes) +{ +if (!nmem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot set an empty mem_nodes set")); +return 0; +} + +if (numa->mem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot alter an existing mem_nodes set")); +return 0; +} + +if (VIR_ALLOC_N(numa->mem_nodes, nmem_nodes) < 0) +return 0; + +numa->nmem_nodes = nmem_nodes; + +return numa->nmem_nodes; +} + +size_t +virDomainNumaGetNodeDistance(virDomainNumaPtr numa, + size_t node, + size_t cellid) +{ +virDomainNumaDistancePtr distances = NULL; + +if (node < numa->nmem_nodes) +distances = numa->mem_nodes[node].distances; + +/* + * Present the configured distance value. If + * out of range or not available set the platform + * defined default for local and remote nodes. + */ +if (!distances || +!distances[cellid].value || +!numa->mem_nodes[node].ndistances) +return (node == cellid) ? \ + LOCAL_DISTANCE : REMOTE_DISTANCE; + +return distances[cellid].value; +} + + +int +virDomainNumaSetNodeDistance(virDomainNumaPtr numa, + size_t node, + size_t cellid, + unsigned int value) +{ +virDomainNumaDistancePtr distances; + +if (node >= numa->nmem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("Argument 'node' %zu outranges " + "defined number of NUMA nodes"), + node); +return -1; +} + +distances = numa->mem_nodes[node].distances; +if (!distances || +cellid >= numa->mem_nodes[node].ndistances) { +virReportError(VIR_ERR_XML_ERROR, "%s", + _("Arguments under memnode element do not " + "correspond with existing guest's NUMA cell")); +return -1; +} + +/* + * Advanced Configuration and Power Interface + * Specification version 6.1. Chapter 5.2.17 + * System Locality Distance Information Table + * ... Distance values of 0-9 are reserved. + */ +if (value < LOCAL_DISTANCE || +value > UNREACHABLE) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Distance value of %d is in 0-9 reserved range"), + value); +return -1; +} + +if (value == LOCAL_DISTANCE && node != cellid) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Distance value %d under node %zu seems " + "LOCAL_DISTANCE and should be set to 10"), + value, node); +return -1; +} + +distances[cellid].cellid = cellid; +dista
[libvirt] [PATCH v5 0/4] numa: describe sibling nodes distances
From: Wim ten Have This patch extends guest domain administration adding support to advertise node sibling distances when configuring NUMA guests also referred to as vNUMA (Virtual NUMA). NUMA (Non-Uniform Memory Access), a method of configuring a cluster of nodes within a single multiprocessing system such that it shares processor local memory amongst others improving performance and the ability of the system to be expanded. A NUMA system could be illustrated as shown below. Within this 4-NODE system, every socket is equipped with its own distinct memory and some with I/O. Access to memory or I/O on remote nodes is only possible through the "Interconnect". This results in different performance for local and remote resources. In contrast to NUMA we recognize the flat SMP system where no concept of local or remote resource exists. The disadvantage of high socket count SMP systems is that the shared bus can easily become a performance bottleneck under high activity. +-+---++---+-+ |NODE0| | || | |NODE3| | | CPU00 | CPU03 || CPU12 | CPU15 | | | | | || | | | | Mem +--- Socket0 ---<>--- Socket3 ---+ Mem | | | | || | | | +-+ CPU01 | CPU02 || CPU13 | CPU14 | | | I/O | | || | | | +-+---^---++---^---+-+ || | Interconnect | || +-v---++---v-+ |NODE1| | || | |NODE2| | | CPU04 | CPU07 || CPU08 | CPU11 | | | | | || | | | | Mem +--- Socket1 ---<>--- Socket2 ---+ Mem | | | | || | | | +-+ CPU05 | CPU06 || CPU09 | CPU10 | | | I/O | | || | | | +-+---+---++---+---+-+ NUMA adds an intermediate level of memory shared amongst a few cores per socket as illustrated above, so that data accesses do not have to travel over a single bus. Unfortunately the way NUMA does this adds its own limitations. This, as visualized in the illustration above, happens when data is stored in memory associated with Socket2 and is accessed by a CPU (core) in Socket0. The processors use the "Interconnect" path to access resource on other nodes. These "Interconnect" hops add data access delays. It is therefore in our interest to describe the relative distances between nodes. The relative distances between nodes are described in the system's SLIT (System Locality Distance Information Table) which is part of the ACPI (Advanced Configuration and Power Interface) specification. On Linux systems the SLIT detail can be listed with help of the 'numactl -H' command. The above guest would show the following output. [root@f25 ~]# numactl -H available: 4 nodes (0-3) node 0 cpus: 0 1 2 3 node 0 size: 2007 MB node 0 free: 1931 MB node 1 cpus: 4 5 6 7 node 1 size: 1951 MB node 1 free: 1902 MB node 2 cpus: 8 9 10 11 node 2 size: 1998 MB node 2 free: 1910 MB node 3 cpus: 12 13 14 15 node 3 size: 2015 MB node 3 free: 1907 MB node distances: node 0 1 2 3 0: 10 21 31 21 1: 21 10 21 31 2: 31 21 10 21 3: 21 31 21 10 These patches extend core libvirt's XML description of NUMA cells to include NUMA distance information and propagate it to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"). The core libvirt extensions in this patch set could be used to propagate NUMA distances to qemu quests in the future. Wim ten Have (4): numa: describe siblings distances within cells xenconfig: add domxml conversions for xen-xl libxl: vnuma support xlconfigtest: add tests for numa cell sibling distances docs/formatdomain.html.in | 63 +++- docs/schemas/basictypes.rng| 7 + docs/schemas/cputypes.rng | 18 ++ src/conf/numa_conf.c | 359 - src/conf/numa_conf.h | 20 ++ src/libvirt_private.syms | 5 + src/libxl/libxl_conf.c | 121 +++ src/libxl/libxl_driver.c | 3 +- src/xenconfig/xen_xl.c | 333 +++ .../test-fullvirt-vnuma-autocomplete.cfg | 26 ++ .../test-fullvirt-vnuma-autocom
Re: [libvirt] [PATCH v4 2/5] numa: describe siblings distances within cells
On Fri, 6 Oct 2017 08:49:46 -0600 Jim Fehlig wrote: > On 09/08/2017 08:47 AM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Add libvirtd NUMA cell domain administration functionality to > > describe underlying cell id sibling distances in full fashion > > when configuring HVM guests. > > May I suggest wording this paragraph as: > > Add support for describing sibling vCPU distances within a domain's vNUMA > cell > configuration. See below (v5 comment). > > Schema updates are made to docs/schemas/cputypes.rng enforcing domain > > administration to follow the syntax below the numa cell id and > > docs/schemas/basictypes.rng to add "numaDistanceValue". > > I'm not sure this paragraph is needed in the commit message. > > > A minimum value of 10 representing the LOCAL_DISTANCE as 0-9 are > > reserved values and can not be used as System Locality Distance Information. > > A value of 20 represents the default setting of REMOTE_DISTANCE > > where a maximum value of 255 represents UNREACHABLE. > > > > Effectively any cell sibling can be assigned a distance value where > > practically 'LOCAL_DISTANCE <= value <= UNREACHABLE'. > > > > [below is an example of a 4 node setup] > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > How would this look when having more than one cpu in a cell? I suppose > something > like > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Nope. That machine seems to make a 2 node vNUMA setup. Where; * NUMA node(0) defined by holds 4 (cores) cpus '0-3' with 2GByte of dedicated memory. * NUMA node(1) defined by holds 4 (cores) cpus '4-7' with 2GByte of dedicated memory. Specific configuration would typically report below when examined from within the guest domain; (despite ignorance in this example that it _DOES_ concern a single socket 8 cpu machine). [root@f25 ~]# lscpu Architecture: x86_64 CPU op-mode(s):32-bit, 64-bit Byte Order:Little Endian CPU(s):8 On-line CPU(s) list: 0-7 Thread(s) per core:1 Core(s) per socket:8 Socket(s): 1 *> NUMA node(s): 2 Vendor ID: AuthenticAMD CPU family:21 Model: 2 Model name:AMD FX-8320E Eight-Core Processor Stepping: 0 CPU MHz: 3210.862 BogoMIPS: 6421.83 Virtualization:AMD-V Hypervisor vendor: Xen Virtualization type: full L1d cache: 16K L1i cache: 64K L2 cache: 2048K L3 cache: 8192K *> NUMA node0 CPU(s): 0-3 *> NUMA node1 CPU(s): 4-7 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm rep_good nopl cpuid extd_apicid pni pclmulqdq ssse3 fma cx16 sse4_1 sse4_2 x2apic popcnt aes xsave avx f16c hypervisor lahf_lm svm cr8_legacy abm sse4a misalignsse 3dnowprefetch ibs xop lwp fma4 tbm vmmcall bmi1 arat npt lbrv nrip_save tsc_scale vmcb_clean decodeassists pausefilter [root@f25 ~]# numactl -H available: 2 nodes (0-1) node 0 cpus: 0 1 2 3 node 0 size: 1990 MB node 0 free: 1786 MB node 1 cpus: 4 5 6 7 node 1 size: 1950 MB node 1 free: 1820 MB node distances: node 0 1 0: 10 21 1: 21 10 > In th
[libvirt] [PATCH v1] build: isolate core libvirt libs deps from xen runtime
From: Wim ten Have Generating libvirt packages per make rpm, "with-libxl=1" and "with-xen=1", adds strict runtime dependencies per libxenlight for xen-libs package from core libvirt-libs package. This is not necessary and unfortunate since those dependencies set demand to "xen-libs" package even when there's no need for libvirt xen or libxl driver components. This patch is to have two separate xenconfig lib tool libraries: one for core libvirt (without XL), and a another that contains xl for libxl driver (libvirt_driver_libxl_impl.la) which when loading the driver, loads the remaining symbols (xen{Format,Parse}XL. For the user/sysadmin, this means the xen dependencies are moved into libxl driver, instead of core libvirt. Signed-off-by: Joao Martins Signed-off-by: Wim ten Have --- src/Makefile.am | 23 +-- src/libvirt_xenxlconfig.syms | 12 2 files changed, 13 insertions(+), 22 deletions(-) delete mode 100644 src/libvirt_xenxlconfig.syms diff --git a/src/Makefile.am b/src/Makefile.am index 173fba1e6..b74856ba7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1197,7 +1197,8 @@ XENCONFIG_SOURCES = \ xenconfig/xen_sxpr.c xenconfig/xen_sxpr.h \ xenconfig/xen_xm.c xenconfig/xen_xm.h if WITH_LIBXL -XENCONFIG_SOURCES += \ +XENCONFIG_LIBXL_SOURCES = \ + $(XENCONFIG_SOURCES)\ xenconfig/xen_xl.c xenconfig/xen_xl.h endif WITH_LIBXL @@ -1258,10 +1259,17 @@ endif WITH_VMX if WITH_XENCONFIG noinst_LTLIBRARIES += libvirt_xenconfig.la libvirt_la_BUILT_LIBADD += libvirt_xenconfig.la -libvirt_xenconfig_la_LIBADD = $(LIBXL_LIBS) libvirt_xenconfig_la_CFLAGS = \ - -I$(srcdir)/conf -I$(srcdir)/libxl $(AM_CFLAGS) + -I$(srcdir)/conf $(AM_CFLAGS) libvirt_xenconfig_la_SOURCES = $(XENCONFIG_SOURCES) + +if WITH_LIBXL +noinst_LTLIBRARIES += libvirt_xenconfig_libxl.la +libvirt_xenconfig_libxl_la_LIBADD = $(LIBXL_LIBS) +libvirt_xenconfig_libxl_la_CFLAGS = \ + -I$(srcdir)/conf -I$(srcdir)/libxl $(AM_CFLAGS) +libvirt_xenconfig_libxl_la_SOURCES = $(XENCONFIG_LIBXL_SOURCES) +endif WITH_LIBXL endif WITH_XENCONFIG @@ -1417,7 +1425,7 @@ libvirt_driver_libxl_impl_la_CFLAGS = \ $(AM_CFLAGS) libvirt_driver_libxl_impl_la_LDFLAGS = $(AM_LDFLAGS) libvirt_driver_libxl_impl_la_LIBADD = $(LIBXL_LIBS) \ - libvirt_xenconfig.la \ + libvirt_xenconfig_libxl.la \ libvirt_secret.la libvirt_driver_libxl_impl_la_SOURCES = $(LIBXL_DRIVER_SOURCES) @@ -2045,6 +2053,7 @@ EXTRA_DIST += \ $(VBOX_DRIVER_EXTRA_DIST) \ $(VMWARE_DRIVER_SOURCES)\ $(XENCONFIG_SOURCES)\ + $(XENCONFIG_LIBXL_SOURCES) \ $(ACCESS_DRIVER_POLKIT_POLICY) check-local: check-augeas @@ -2228,12 +2237,6 @@ else ! WITH_XENCONFIG SYM_FILES += $(srcdir)/libvirt_xenconfig.syms endif ! WITH_XENCONFIG -if WITH_LIBXL -USED_SYM_FILES += $(srcdir)/libvirt_xenxlconfig.syms -else ! WITH_LIBXL -SYM_FILES += $(srcdir)/libvirt_xenxlconfig.syms -endif ! WITH_LIBXL - if WITH_SASL USED_SYM_FILES += $(srcdir)/libvirt_sasl.syms else ! WITH_SASL diff --git a/src/libvirt_xenxlconfig.syms b/src/libvirt_xenxlconfig.syms deleted file mode 100644 index dbe43aac7..0 --- a/src/libvirt_xenxlconfig.syms +++ /dev/null @@ -1,12 +0,0 @@ -# -# These symbols are dependent upon --with-libxl via WITH_LIBXL. -# - -#xenconfig/xen_xl.h -xenFormatXL; -xenParseXL; - -# Let emacs know we want case-insensitive sorting -# Local Variables: -# sort-fold-case: t -# End: -- 2.13.6 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC PATCH] build: isolate core libvirt libs deps from xen runtime
From: Wim ten Have Generating libvirt packages per make rpm, "with_libxl=1" and "with_xen=1", adds strict runtime dependencies per libxenlight for xen-libs package from core libvirt-libs package. This is not necessary and unfortunate since those dependencies set demand to "xen-libs" package even when there's no need for libvirt xen or libxl driver components. This patch is to have two separate xenconfig lib tool libraries: one for core libvirt (without XL), and a another that contains xl for libxl driver (libvirt_driver_libxl_impl.la) which when loading the driver, loads the remaining symbols (xen{Format,Parse}XL. For the user/sysadmin, this means the xen dependencies are moved into libxl driver, instead of core libvirt. Signed-off-by: Joao Martins Signed-off-by: Wim ten Have --- src/Makefile.am | 17 + src/libvirt_xenxlconfig.syms | 2 -- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 173fba1e6..80fa2feba 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1197,7 +1197,8 @@ XENCONFIG_SOURCES = \ xenconfig/xen_sxpr.c xenconfig/xen_sxpr.h \ xenconfig/xen_xm.c xenconfig/xen_xm.h if WITH_LIBXL -XENCONFIG_SOURCES += \ +XENCONFIG_LIBXL_SOURCES = \ + $(XENCONFIG_SOURCES) \ xenconfig/xen_xl.c xenconfig/xen_xl.h endif WITH_LIBXL @@ -1258,10 +1259,17 @@ endif WITH_VMX if WITH_XENCONFIG noinst_LTLIBRARIES += libvirt_xenconfig.la libvirt_la_BUILT_LIBADD += libvirt_xenconfig.la -libvirt_xenconfig_la_LIBADD = $(LIBXL_LIBS) libvirt_xenconfig_la_CFLAGS = \ - -I$(srcdir)/conf -I$(srcdir)/libxl $(AM_CFLAGS) + -I$(srcdir)/conf $(AM_CFLAGS) libvirt_xenconfig_la_SOURCES = $(XENCONFIG_SOURCES) + +if WITH_LIBXL +noinst_LTLIBRARIES += libvirt_xenconfig_libxl.la +libvirt_xenconfig_libxl_la_LIBADD = $(LIBXL_LIBS) +libvirt_xenconfig_libxl_la_CFLAGS = \ + -I$(srcdir)/conf -I$(srcdir)/libxl $(AM_CFLAGS) +libvirt_xenconfig_libxl_la_SOURCES = $(XENCONFIG_LIBXL_SOURCES) +endif WITH_LIBXL endif WITH_XENCONFIG @@ -1417,7 +1425,7 @@ libvirt_driver_libxl_impl_la_CFLAGS = \ $(AM_CFLAGS) libvirt_driver_libxl_impl_la_LDFLAGS = $(AM_LDFLAGS) libvirt_driver_libxl_impl_la_LIBADD = $(LIBXL_LIBS) \ - libvirt_xenconfig.la \ + libvirt_xenconfig_libxl.la \ libvirt_secret.la libvirt_driver_libxl_impl_la_SOURCES = $(LIBXL_DRIVER_SOURCES) @@ -2045,6 +2053,7 @@ EXTRA_DIST += \ $(VBOX_DRIVER_EXTRA_DIST) \ $(VMWARE_DRIVER_SOURCES)\ $(XENCONFIG_SOURCES)\ + $(XENCONFIG_LIBXL_SOURCES) \ $(ACCESS_DRIVER_POLKIT_POLICY) check-local: check-augeas diff --git a/src/libvirt_xenxlconfig.syms b/src/libvirt_xenxlconfig.syms index dbe43aac7..50407002d 100644 --- a/src/libvirt_xenxlconfig.syms +++ b/src/libvirt_xenxlconfig.syms @@ -3,8 +3,6 @@ # #xenconfig/xen_xl.h -xenFormatXL; -xenParseXL; # Let emacs know we want case-insensitive sorting # Local Variables: -- 2.13.6 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v4 3/5] xenconfig: add domxml conversions for xen-xl
From: Wim ten Have This patch converts NUMA configurations between the Xen libxl configuration file format and libvirt's XML format. XML HVM domain configuration: Xen xl.cfg domain configuration: vnuma = [["pnode=0","size=2048","vcpus=0-1","vdistances=10,21,31,41"], ["pnode=1","size=2048","vcpus=2-3","vdistances=21,10,21,31"], ["pnode=2","size=2048","vcpus=4-5","vdistances=31,21,10,21"], ["pnode=3","size=2048","vcpus=6-7","vdistances=41,31,21,10"]] If there is no XML description amongst the data the conversion schema from xml to native will generate 10 for local and 20 for all remote instances. Signed-off-by: Wim ten Have --- Changes on v2: - Reduce the indentation level under xenParseXLVnuma(). Changes on v3: - Add the Numa core split functions required to interface. --- src/conf/numa_conf.c | 138 src/conf/numa_conf.h | 20 +++ src/libvirt_private.syms | 5 + src/xenconfig/xen_xl.c | 333 +++ 4 files changed, 496 insertions(+) diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index 5db4311..d30b8cc 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -1123,6 +1123,133 @@ virDomainNumaGetNodeCount(virDomainNumaPtr numa) } +size_t +virDomainNumaSetNodeCount(virDomainNumaPtr numa, size_t nmem_nodes) +{ +if (!nmem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot set an empty mem_nodes set")); +return 0; +} + +if (numa->mem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot alter an existing mem_nodes set")); +return 0; +} + +if (VIR_ALLOC_N(numa->mem_nodes, nmem_nodes) < 0) +return 0; + +numa->nmem_nodes = nmem_nodes; + +return numa->nmem_nodes; +} + +size_t +virDomainNumaGetNodeDistance(virDomainNumaPtr numa, + size_t node, + size_t cellid) +{ +virDomainNumaDistancePtr distances = NULL; + +if (node < numa->nmem_nodes) +distances = numa->mem_nodes[node].distances; + +/* + * Present the configured distance value. If + * out of range or not available set the platform + * defined default for local and remote nodes. + */ +if (!distances || +!distances[cellid].value || +!numa->mem_nodes[node].ndistances) +return (node == cellid) ? \ + LOCAL_DISTANCE : REMOTE_DISTANCE; + +return distances[cellid].value; +} + + +int +virDomainNumaSetNodeDistance(virDomainNumaPtr numa, + size_t node, + size_t cellid, + unsigned int value) +{ +virDomainNumaDistancePtr distances; + +if (node >= numa->nmem_nodes) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("Argument 'node' %zu outranges " + "defined number of NUMA nodes"), + node); +return -1; +} + +distances = numa->mem_nodes[node].distances; +if (!distances || +cellid >= numa->mem_nodes[node].ndistances) { +virReportError(VIR_ERR_XML_ERROR, "%s", + _("Arguments under memnode element do not " + "correspond with existing guest's NUMA cell")); +return -1; +} + +/* + * Advanced Configuration and Power Interface + * Specification version 6.1. Chapter 5.2.17 + * System Locality Distance Information Table + * ... Distance values of 0-9 are reserved. + */ +if (value < LOCAL_DISTANCE || +value > UNREACHABLE) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Distance value of %d is in 0-9 reserved range"), + value); +return -1; +} + +if (value == LOCAL_DISTANCE && node != cellid) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Distance value %d under node %zu seems " + "LOCAL_DISTANCE and should be set to 10"), + value, node); +return -1; +} + +distances[cellid].cellid = cellid; +distances[cellid].value = value; + +return distance
[libvirt] [PATCH v4 0/5] numa: describe sibling nodes distances
From: Wim ten Have This patch extends guest domain administration adding support to advertise node sibling distances when configuring HVM numa guests. NUMA (non-uniform memory access), a method of configuring a cluster of nodes within a single multiprocessing system such that it shares processor local memory amongst others improving performance and the ability of the system to be expanded. A NUMA system could be illustrated as shown below. Within this 4-node system, every socket is equipped with its own distinct memory. The whole typically resembles a SMP (symmetric multiprocessing) system being a "tightly-coupled," "share everything" system in which multiple processors are working under a single operating system and can access each others' memory over multiple "Bus Interconnect" paths. +-+-+-+ +-+-+-+ | M | CPU | CPU | | CPU | CPU | M | | E | | | | | | E | | M +- Socket0 -+ +- Socket3 -+ M | | O | | | | | | O | | R | CPU | CPU <-> CPU | CPU | R | | Y | | | | | | Y | +-+--^--+-+ +-+--^--+-+ | | | Bus Interconnect | | | +-+--v--+-+ +-+--v--+-+ | M | | | | | | M | | E | CPU | CPU <-> CPU | CPU | E | | M | | | | | | M | | O +- Socket1 -+ +- Socket2 -+ O | | R | | | | | | R | | Y | CPU | CPU | | CPU | CPU | Y | +-+-+-+ +-+-+-+ In contrast there is the limitation of a flat SMP system (not illustrated) under which the bus (data and address path) can easily become a performance bottleneck under high activity as sockets are added. NUMA adds an intermediate level of memory shared amongst a few cores per socket as illustrated above, so that data accesses do not have to travel over a single bus. Unfortunately the way NUMA does this adds its own limitations. This, as visualized in the illustration above, happens when data is stored in memory associated with Socket2 and is accessed by a CPU (core) in Socket0. The processors use the "Bus Interconnect" to create gateways between the sockets (nodes) enabling inter-socket access to memory. These "Bus Interconnect" hops add data access delays when a CPU (core) accesses memory associated with a remote socket (node). For terminology we refer to sockets as "nodes" where access to each others' distinct resources such as memory make them "siblings" with a designated "distance" between them. A specific design is described under the ACPI (Advanced Configuration and Power Interface Specification) within the chapter explaining the system's SLIT (System Locality Distance Information Table). These patches extend core libvirt's XML description of a virtual machine's hardware to include NUMA distance information for sibling nodes, which is then passed to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"), hence these core libvirt extensions can also help other drivers in supporting this feature. The XML changes made allow to describe the node/sockets amongst node identifiers and propagate these towards the numa domain functionality finally adding support to libxl. [below is an example illustrating a 4 node/socket setup] By default on libxl, if no are given to describe the distance data between different s, this patch will default to a scheme using 10 for local and 20 for any remote node/socket, which is the assumption of guest OS when no SLIT is specified. While SLIT is optional, libxl requires that distances are set nonetheless. On Linux systems the SLIT detail can be listed with help of the 'numactl -H' command. The above HVM guest would show the following output. [root@f25 ~]# numactl -H available: 4 nodes (0-3) node 0 cpus: 0 4 5 6 7 node 0 size: 1988 MB node 0 free: 1743 MB node 1 cpus: 1 8 9 10 12 13 14 15 node 1 size: 1946 MB node 1 free: 1885 MB node 2 cpus: 2 11 node 2 size: 2011 MB node 2 free: 1912
[libvirt] [PATCH v4 1/5] numa: rename function virDomainNumaDefCPUFormat
From: Wim ten Have This patch renames virDomainNumaDefCPUFormat(), by adding XML, into virDomainNumaDefCPUFormatXML(). So that it meets its peer Parse sibling virDomainNumaDefCPUParseXML() and matches vir*XML() function naming conventions. Signed-off-by: Wim ten Have --- Changes on v3: - Cleanup by putting this change under a separate patch in series. --- src/conf/cpu_conf.c | 2 +- src/conf/numa_conf.c | 4 ++-- src/conf/numa_conf.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c index c21d11d..8d804a1 100644 --- a/src/conf/cpu_conf.c +++ b/src/conf/cpu_conf.c @@ -642,7 +642,7 @@ virCPUDefFormatBufFull(virBufferPtr buf, if (virCPUDefFormatBuf(&childrenBuf, def, updateCPU) < 0) goto cleanup; -if (virDomainNumaDefCPUFormat(&childrenBuf, numa) < 0) +if (virDomainNumaDefCPUFormatXML(&childrenBuf, numa) < 0) goto cleanup; if (virBufferCheckError(&attributeBuf) < 0 || diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index bfd3703..b71dc01 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -801,8 +801,8 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def, int -virDomainNumaDefCPUFormat(virBufferPtr buf, - virDomainNumaPtr def) +virDomainNumaDefCPUFormatXML(virBufferPtr buf, + virDomainNumaPtr def) { virDomainMemoryAccess memAccess; char *cpustr; diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h index b6a5354..378b772 100644 --- a/src/conf/numa_conf.h +++ b/src/conf/numa_conf.h @@ -151,7 +151,7 @@ bool virDomainNumatuneNodeSpecified(virDomainNumaPtr numatune, int cellid); int virDomainNumaDefCPUParseXML(virDomainNumaPtr def, xmlXPathContextPtr ctxt); -int virDomainNumaDefCPUFormat(virBufferPtr buf, virDomainNumaPtr def); +int virDomainNumaDefCPUFormatXML(virBufferPtr buf, virDomainNumaPtr def); unsigned int virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa); -- 2.9.5 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v4 2/5] numa: describe siblings distances within cells
From: Wim ten Have Add libvirtd NUMA cell domain administration functionality to describe underlying cell id sibling distances in full fashion when configuring HVM guests. Schema updates are made to docs/schemas/cputypes.rng enforcing domain administration to follow the syntax below the numa cell id and docs/schemas/basictypes.rng to add "numaDistanceValue". A minimum value of 10 representing the LOCAL_DISTANCE as 0-9 are reserved values and can not be used as System Locality Distance Information. A value of 20 represents the default setting of REMOTE_DISTANCE where a maximum value of 255 represents UNREACHABLE. Effectively any cell sibling can be assigned a distance value where practically 'LOCAL_DISTANCE <= value <= UNREACHABLE'. [below is an example of a 4 node setup] Whenever a sibling id the cell LOCAL_DISTANCE does apply and for any sibling id not being covered a default of REMOTE_DISTANCE is used for internal computations. Signed-off-by: Wim ten Have --- Changes on v1: - Add changes to docs/formatdomain.html.in describing schema update. Changes on v2: - Automatically apply distance symmetry maintaining cell <-> sibling. - Check for maximum '255' on numaDistanceValue. - Automatically complete empty distance ranges. - Check that sibling_id's are in range with cell identifiers. - Allow non-contiguous ranges, starting from any node id. - Respect parameters as ATTRIBUTE_NONNULL fix functions and callers. - Add and apply topology for LOCAL_DISTANCE=10 and REMOTE_DISTANCE=20. Changes on v3 - Add UNREACHABLE if one locality is unreachable from another. - Add code cleanup aligning function naming in a separated patch. - Add numa related driver code in a separated patch. - Remove from numaDistanceValue schema/basictypes.rng - Correct doc changes. --- docs/formatdomain.html.in | 63 +- docs/schemas/basictypes.rng | 7 ++ docs/schemas/cputypes.rng | 18 src/conf/numa_conf.c| 200 +++- 4 files changed, 284 insertions(+), 4 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 8ca7637..c453d44 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1529,7 +1529,68 @@ - This guest NUMA specification is currently available only for QEMU/KVM. + This guest NUMA specification is currently available only for + QEMU/KVM and Xen. Whereas Xen driver also allows for a distinct + description of NUMA arranged sibling cell + distances Since 3.6.0. + + + + Under NUMA h/w architecture, distinct resources such as memory + create a designated distance between cell and + siblings that now can be described with the help of + distances. A detailed description can be found within + the ACPI (Advanced Configuration and Power Interface Specification) + within the chapter explaining the system's SLIT (System Locality + Distance Information Table). + + + +... +<cpu> + ... + <numa> +<cell id='0' cpus='0,4-7' memory='512000' unit='KiB'> + <distances> +<sibling id='0' value='10'/> +<sibling id='1' value='21'/> +<sibling id='2' value='31'/> +<sibling id='3' value='41'/> + </distances> +</cell> +<cell id='1' cpus='1,8-10,12-15' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='21'/> +<sibling id='1' value='10'/> +<sibling id='2' value='21'/> +<sibling id='3' value='31'/> + </distances> +</cell> +<cell id='2' cpus='2,11' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='31'/> +<sibling id='1' value='21'/> +<sibling id='2' value='10'/> +<sibling id='3' value='21'/> + </distances> +</cell> +<cell id='3' cpus='3' memory='512000' unit='KiB'> + <distances> +<sibling id='0' value='41'/> +<sibling id='1' value=&
[libvirt] [PATCH v4 4/5] libxl: vnuma support
From: Wim ten Have This patch generates a NUMA distance-aware libxl description from the information extracted from a NUMA distance-aware libvirt XML file. By default, if no NUMA node distance information is supplied in the libvirt XML file, this patch uses the distances 10 for local and 20 for remote nodes/sockets." Signed-off-by: Wim ten Have --- Changes on v1: - Fix function calling (coding) standards. - Use virReportError(...) under all failing circumstances. - Reduce redundant (format->parse) code sorting bitmaps. - Avoid non GNU shorthand notations, difficult code practice. Changes on v2: - Have autonomous defaults applied from virDomainNumaGetNodeDistance. - Automatically make Xen driver simulate vnuma pnodes on non NUMA h/w. - Compute 'memory unit=' making it the sum of all node memory. --- src/libxl/libxl_conf.c | 120 +++ src/libxl/libxl_driver.c | 3 +- 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index 4416a09..5fb3561 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -618,6 +618,121 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, return 0; } +#ifdef LIBXL_HAVE_VNUMA +static int +libxlMakeVnumaList(virDomainDefPtr def, + libxl_ctx *ctx, + libxl_domain_config *d_config) +{ +int ret = -1; +size_t i, j; +size_t nr_nodes; +size_t num_vnuma; +bool simulate = false; +virBitmapPtr bitmap = NULL; +virDomainNumaPtr numa = def->numa; +libxl_domain_build_info *b_info = &d_config->b_info; +libxl_physinfo physinfo; +libxl_vnode_info *vnuma_nodes = NULL; + +if (!numa) +return 0; + +num_vnuma = virDomainNumaGetNodeCount(numa); +if (!num_vnuma) +return 0; + +libxl_physinfo_init(&physinfo); +if (libxl_get_physinfo(ctx, &physinfo) < 0) { +libxl_physinfo_dispose(&physinfo); +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_physinfo_info failed")); +return -1; +} +nr_nodes = physinfo.nr_nodes; +libxl_physinfo_dispose(&physinfo); + +if (num_vnuma > nr_nodes) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Number of configured numa cells %zu exceeds the physical available nodes %zu, guest simulates numa"), + num_vnuma, nr_nodes); +simulate = true; +} + +/* + * allocate the vnuma_nodes for assignment under b_info. + */ +if (VIR_ALLOC_N(vnuma_nodes, num_vnuma) < 0) +return -1; + +/* + * parse the vnuma vnodes data. + */ +for (i = 0; i < num_vnuma; i++) { +int cpu; +libxl_bitmap vcpu_bitmap; +libxl_vnode_info *p = &vnuma_nodes[i]; + +libxl_vnode_info_init(p); + +/* pnode */ +p->pnode = simulate ? 0 : i; + +/* memory size */ +p->memkb = virDomainNumaGetNodeMemorySize(numa, i); + +/* vcpus */ +bitmap = virDomainNumaGetNodeCpumask(numa, i); +if (bitmap == NULL) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma sibling %zu missing vcpus set"), i); +goto cleanup; +} + +if ((cpu = virBitmapNextSetBit(bitmap, -1)) < 0) +goto cleanup; + +libxl_bitmap_init(&vcpu_bitmap); +if (libxl_cpu_bitmap_alloc(ctx, &vcpu_bitmap, b_info->max_vcpus)) { +virReportOOMError(); +goto cleanup; +} + +do { +libxl_bitmap_set(&vcpu_bitmap, cpu); +} while ((cpu = virBitmapNextSetBit(bitmap, cpu)) >= 0); + +libxl_bitmap_copy_alloc(ctx, &p->vcpus, &vcpu_bitmap); +libxl_bitmap_dispose(&vcpu_bitmap); + +/* vdistances */ +if (VIR_ALLOC_N(p->distances, num_vnuma) < 0) +goto cleanup; +p->num_distances = num_vnuma; + +for (j = 0; j < num_vnuma; j++) +p->distances[j] = virDomainNumaGetNodeDistance(numa, i, j); +} + +b_info->vnuma_nodes = vnuma_nodes; +b_info->num_vnuma_nodes = num_vnuma; + +ret = 0; + + cleanup: +if (ret) { +for (i = 0; i < num_vnuma; i++) { +libxl_vnode_info *p = &vnuma_nodes[i]; + +VIR_FREE(p->distances); +} +VIR_FREE(vnuma_nodes); +} + +return ret; +} +#endif + static int libxlDiskSetDiscard(libxl_device_disk *x_disk, int discard) { @@ -2208,6 +2323,11 @@ libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, if (libxlMakeDomBuildInfo(def, ctx, caps, d_config) < 0) return -1; +#ifdef LIBXL_HAVE_VNUMA +if (libxlMakeVnumaList(def, ctx, d_config) < 0) +return -1; +#endif + if (libxlMakeDiskList(def, d_config) <
[libvirt] [PATCH v4 5/5] xlconfigtest: add tests for numa cell sibling distances
From: Wim ten Have Test a bidirectional xen-xl domxml to and from native for numa support administration as brought under this patch series. Signed-off-by: Wim ten Have --- .../test-fullvirt-vnuma-autocomplete.cfg | 26 +++ .../test-fullvirt-vnuma-autocomplete.xml | 85 ++ .../test-fullvirt-vnuma-nodistances.cfg| 26 +++ .../test-fullvirt-vnuma-nodistances.xml| 53 ++ .../test-fullvirt-vnuma-partialdist.cfg| 26 +++ .../test-fullvirt-vnuma-partialdist.xml| 60 +++ tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 +++ tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 + tests/xlconfigtest.c | 6 ++ 9 files changed, 389 insertions(+) create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-partialdist.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-partialdist.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.xml diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg new file mode 100644 index 000..edba69a --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 12 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0,11", "vdistances=10,21,31,41,51,61" ], [ "pnode=1", "size=2048", "vcpus=1,10", "vdistances=21,10,21,31,41,51" ], [ "pnode=2", "size=2048", "vcpus=2,9", "vdistances=31,21,10,21,31,41" ], [ "pnode=3", "size=2048", "vcpus=3,8", "vdistances=41,31,21,10,21,31" ], [ "pnode=4", "size=2048", "vcpus=4,7", "vdistances=51,41,31,21,10,21" ], [ "pnode=5", "size=2048", "vcpus=5-6", "vdistances=61,51,41,31,21,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml new file mode 100644 index 000..e3639eb --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-autocomplete.xml @@ -0,0 +1,85 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 12 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg new file mode 100644 index 000..8186edf --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial =
Re: [libvirt] [PATCH v3 1/4] numa: describe siblings distances within cells
On Mon, 4 Sep 2017 08:49:33 +0200 Martin Kletzander wrote: > On Fri, Sep 01, 2017 at 04:31:50PM +0200, Wim ten Have wrote: > >On Thu, 31 Aug 2017 16:36:58 +0200 > >Martin Kletzander wrote: > > > >> On Thu, Aug 31, 2017 at 04:02:38PM +0200, Wim Ten Have wrote: > >> >From: Wim ten Have > >> > > > > >> I haven't seen the previous versions, so sorry if I point out something > >> that got changed already. > > > >There was a v1 and v2 cycle by Jim and Daniel 2 month back. > >Due to personal issues whole got delayed at my end where i am > >catching up. > > > >> >Add libvirtd NUMA cell domain administration functionality to > >> >describe underlying cell id sibling distances in full fashion > >> >when configuring HVM guests. > >> > > >> >[below is an example of a 4 node setup] > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > > >> > >> Are all those required? I don't see the point in requiring duplicate > >> values. > > > >Sorry I am not sure I understand what you mean by "duplicate values" > >here. Can you elaborate? > > > > Sure. For simplicity let's assume 2 cells: > > > > > > > > > > > > > The fields are not a necessity. And to reduce even more we could also remove LOCAL_DISTANCES as they make a constant factor where; (cell_id == sibling_id) So, although the presentation of a guest domain may heavily, giving this a look it seems far from attractive to me. I am not sure if this is what we want and should do. Let me go forward sending out "PATCH v4" addressing other comments and reserve this idea for "PATCH v5". Regards, - Wim. > >> >Changes under this commit concern all those that require adding > >> >the valid data structures, virDomainNuma* functional routines and the > >> >XML doc schema enhancements to enforce appropriate administration. > >> > >> I don't understand the point of this paragraph. > > > >Let me rephrase this paragraph in future version. > > > >> >These changes are accompanied with topic related documentation under > >> >docs/formatdomain.html within the "CPU model and topology" extending the > >> >"Guest NUMA topology" paragraph. > >> > >> This can be seen from the patch. > > > >Agree, so I'll address this in future version too. > > > >> >For terminology we refer to sockets as "nodes" where access to each > >> >others' distinct resources such as memory make them "siblings" with a > >> >designated "distance" between them. A specific design is described under > >> >the ACPI (Advanced Configuration and Power Interface Specification) > >> >within the chapter explaining the system's SLIT (System Locality Distance > >> >Information Table). > >> > >> The above paragraph is also being added in the docs. > > > >True so I'll scratch this part of the commit message. > > > >> >These patches extend core libvirt's XML description of a virtual machine's > >> >hardware to include NUMA distance information for sibling nodes, which > >> >is then passed to Xen guests via libxl. Recently qemu landed support for > >> >constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA > >> >distance for different NUMA nodes"), hence these core libvirt extensions > >> >can also help other drivers in supporting this feature. > >> > >> This is not true, libxl changes are i
Re: [libvirt] [PATCH v3 1/4] numa: describe siblings distances within cells
On Mon, 4 Sep 2017 08:49:33 +0200 Martin Kletzander wrote: > On Fri, Sep 01, 2017 at 04:31:50PM +0200, Wim ten Have wrote: > >On Thu, 31 Aug 2017 16:36:58 +0200 > >Martin Kletzander wrote: > > > >> >diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c > >> >index c21d11d..8d804a1 100644 > >> >--- a/src/conf/cpu_conf.c > >> >+++ b/src/conf/cpu_conf.c > >> >@@ -642,7 +642,7 @@ virCPUDefFormatBufFull(virBufferPtr buf, > >> > if (virCPUDefFormatBuf(&childrenBuf, def, updateCPU) < 0) > >> > goto cleanup; > >> > > >> >-if (virDomainNumaDefCPUFormat(&childrenBuf, numa) < 0) > >> >+if (virDomainNumaDefCPUFormatXML(&childrenBuf, numa) < 0) > >> > goto cleanup; > > > >> Changing function names should be separate patch. Why is this > >> changed anyway? > > > >I renamed virDomainNumaDefCPUFormat() to virDomainNumaDefCPUFormatXML() > >to make it consistent with already existing function names like > >virDomainNumaDefCPUParseXML() > > > > Then put it in a separate patch. Sure. Do you advise me to put this patch in same or in a separated set? - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v3 1/4] numa: describe siblings distances within cells
On Thu, 31 Aug 2017 16:36:58 +0200 Martin Kletzander wrote: > On Thu, Aug 31, 2017 at 04:02:38PM +0200, Wim Ten Have wrote: > >From: Wim ten Have > > > I haven't seen the previous versions, so sorry if I point out something > that got changed already. There was a v1 and v2 cycle by Jim and Daniel 2 month back. Due to personal issues whole got delayed at my end where i am catching up. > >Add libvirtd NUMA cell domain administration functionality to > >describe underlying cell id sibling distances in full fashion > >when configuring HVM guests. > > > >[below is an example of a 4 node setup] > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Are all those required? I don't see the point in requiring duplicate > values. Sorry I am not sure I understand what you mean by "duplicate values" here. Can you elaborate? > >Changes under this commit concern all those that require adding > >the valid data structures, virDomainNuma* functional routines and the > >XML doc schema enhancements to enforce appropriate administration. > > I don't understand the point of this paragraph. Let me rephrase this paragraph in future version. > >These changes are accompanied with topic related documentation under > >docs/formatdomain.html within the "CPU model and topology" extending the > >"Guest NUMA topology" paragraph. > > This can be seen from the patch. Agree, so I'll address this in future version too. > >For terminology we refer to sockets as "nodes" where access to each > >others' distinct resources such as memory make them "siblings" with a > >designated "distance" between them. A specific design is described under > >the ACPI (Advanced Configuration and Power Interface Specification) > >within the chapter explaining the system's SLIT (System Locality Distance > >Information Table). > > The above paragraph is also being added in the docs. True so I'll scratch this part of the commit message. > >These patches extend core libvirt's XML description of a virtual machine's > >hardware to include NUMA distance information for sibling nodes, which > >is then passed to Xen guests via libxl. Recently qemu landed support for > >constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA > >distance for different NUMA nodes"), hence these core libvirt extensions > >can also help other drivers in supporting this feature. > > This is not true, libxl changes are in separate patch. Like the previous comments and replies, I'll rearrange the commit messages and rephrase them to be more specific to the patch. > There's a lot of copy-paste from the cover-letter... There is indeed room for improvement. > >These changes alter the docs/schemas/cputypes.rng enforcing > >domain administration to follow the syntax below per numa cell id. > > > >These changes also alter docs/schemas/basictypes.rng to add > >"numaDistanceValue" which is an "unsignedInt" with a minimum value > >of 10 as 0-9 are reserved values and can not be used as System > >Locality Distance Information Table data. > > > >Signed-off-by: Wim ten Have > >--- > >Changes on v1: > >- Add changes to docs/formatdomain.html.in describing schema update. > >Changes on v2: > >- Automatically apply distance symmetry maintaining cell <-> sibling. > >- Check for maximum '255' on numaDistanceValue. > >- Automatically complete empty distance ranges. > >- Check that sibling_id's are in range with cell identifiers. > >- Allow non-contiguous ranges, starting from any node id. > >- Respect parameters as ATTRIBUTE_NONNULL fix functions and callers. > >- Add and apply topoly for LOCAL_DISTANCE=10 and REMOTE_DISTANCE=20. > >--- > > docs/formatdomain.html.in | 70 +- > > docs/schemas/basictypes.rng | 9 ++ > > docs/schemas/cputypes.rng | 18 +++ > > src/conf/cpu_conf.c | 2 +- > > src/conf/numa_conf.c| 323 > > +++
[libvirt] [PATCH v3 4/4] xlconfigtest: add tests for numa cell sibling distances
From: Wim ten Have Test a bidirectional xen-xl domxml to and from native for numa support administration as brought under this patch series. Signed-off-by: Wim ten Have --- .../test-fullvirt-vnuma-nodistances.cfg| 26 +++ .../test-fullvirt-vnuma-nodistances.xml| 53 ++ tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 +++ tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 ++ tests/xlconfigtest.c | 4 ++ 5 files changed, 190 insertions(+) create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.xml diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg new file mode 100644 index 000..8186edf --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0-1", "vdistances=10,20,20,20" ], [ "pnode=1", "size=2048", "vcpus=2-3", "vdistances=20,10,20,20" ], [ "pnode=2", "size=2048", "vcpus=4-5", "vdistances=20,20,10,20" ], [ "pnode=3", "size=2048", "vcpus=6-7", "vdistances=20,20,20,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml new file mode 100644 index 000..9cab3ca --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml @@ -0,0 +1,53 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 8 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigdata/test-fullvirt-vnuma.cfg b/tests/xlconfigdata/test-fullvirt-vnuma.cfg new file mode 100644 index 000..91e233a --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0-1", "vdistances=10,21,31,41" ], [ "pnode=1", "size=2048", "vcpus=2-3", "vdistances=21,10,21,31" ], [ "pnode=2", "size=2048", "vcpus=4-5", "vdistances=31,21,10,21" ], [ "pnode=3", "size=2048", "vcpus=6-7", "vdistances=41,31,21,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma.xml b/tests/xlconfigdata/test-fullvirt-vnuma.xml new file mode 100644 index 000..5368b0d --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma.xml @@ -0,0 +1,81 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 8 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
[libvirt] [PATCH v3 3/4] xenconfig: add domxml conversions for xen-xl
From: Wim ten Have This patch converts NUMA configurations between the Xen libxl configuration file format and libvirt's XML format. XML HVM domain configuration: Xen xl.cfg domain configuration: vnuma = [["pnode=0","size=2048","vcpus=0-1","vdistances=10,21,31,41"], ["pnode=1","size=2048","vcpus=2-3","vdistances=21,10,21,31"], ["pnode=2","size=2048","vcpus=4-5","vdistances=31,21,10,21"], ["pnode=3","size=2048","vcpus=6-7","vdistances=41,31,21,10"]] If there is no XML description amongst the data the conversion schema from xml to native will generate 10 for local and 21 for all remote instances. Signed-off-by: Wim ten Have --- Changes on v2: - Reduce the indentation level under xenParseXLVnuma(). --- src/xenconfig/xen_xl.c | 333 + 1 file changed, 333 insertions(+) diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index d168d3f..b72ada9 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -309,6 +309,205 @@ xenParseXLSpice(virConfPtr conf, virDomainDefPtr def) return -1; } +#ifdef LIBXL_HAVE_VNUMA +static int +xenParseXLVnuma(virConfPtr conf, virDomainDefPtr def) +{ +int ret = -1; +char *tmp = NULL; +char **token = NULL; +size_t vcpus = 0; +size_t nr_nodes = 0; +size_t vnodeCnt = 0; +virCPUDefPtr cpu = NULL; +virConfValuePtr list; +virConfValuePtr vnode; +virDomainNumaPtr numa; + +numa = def->numa; +if (numa == NULL) +return -1; + +list = virConfGetValue(conf, "vnuma"); +if (!list || list->type != VIR_CONF_LIST) +return 0; + +vnode = list->list; +while (vnode && vnode->type == VIR_CONF_LIST) { +vnode = vnode->next; +nr_nodes++; +} + +if (!virDomainNumaSetNodeCount(numa, nr_nodes)) +goto cleanup; + +if (VIR_ALLOC(cpu) < 0) +goto cleanup; + +list = list->list; +while (list) { +int pnode = -1; +virBitmapPtr cpumask = NULL; +unsigned long long kbsize = 0; + +/* Is there a sublist (vnode)? */ +if (list && list->type == VIR_CONF_LIST) { +vnode = list->list; + +while (vnode && vnode->type == VIR_CONF_STRING) { +const char *data; +const char *str = vnode->str; + +if (!str || + !(data = strrchr(str, '='))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode invalid format '%s'"), + str); +goto cleanup; +} +data++; + +if (*data) { +size_t len; +char vtoken[64]; + +if (STRPREFIX(str, "pnode")) { +unsigned int cellid; + +len = strlen(data); +if (!virStrncpy(vtoken, data, +len, sizeof(vtoken))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu pnode '%s' too long for destination"), + vnodeCnt, data); +goto cleanup; +} + +if ((virStrToLong_ui(vtoken, NULL, 10, &cellid) < 0) || +(cellid >= nr_nodes)) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("vnuma vnode %zu invalid value pnode '%s'"), + vnodeCnt, data); +goto cleanup; +} +pnode = cellid; +} else if (STRPREFIX(str, "size")) { +len = strlen(data); +if (!virStrncpy(vtoken, data, +len, sizeof(vtoken))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu size '%s' too long for destination"), + vnodeCnt, data); +goto cle
[libvirt] [PATCH v3 0/4] numa: describe sibling nodes distances
From: Wim ten Have This patch extents guest domain administration adding support to advertise node sibling distances when configuring HVM numa guests. NUMA (non-uniform memory access), a method of configuring a cluster of nodes within a single multiprocessing system such that it shares processor local memory amongst others improving performance and the ability of the system to be expanded. A NUMA system could be illustrated as shown below. Within this 4-node system, every socket is equipped with its own distinct memory. The whole typically resembles a SMP (symmetric multiprocessing) system being a "tightly-coupled," "share everything" system in which multiple processors are working under a single operating system and can access each others' memory over multiple "Bus Interconnect" paths. +-+-+-+ +-+-+-+ | M | CPU | CPU | | CPU | CPU | M | | E | | | | | | E | | M +- Socket0 -+ +- Socket3 -+ M | | O | | | | | | O | | R | CPU | CPU <-> CPU | CPU | R | | Y | | | | | | Y | +-+--^--+-+ +-+--^--+-+ | | | Bus Interconnect | | | +-+--v--+-+ +-+--v--+-+ | M | | | | | | M | | E | CPU | CPU <-> CPU | CPU | E | | M | | | | | | M | | O +- Socket1 -+ +- Socket2 -+ O | | R | | | | | | R | | Y | CPU | CPU | | CPU | CPU | Y | +-+-+-+ +-+-+-+ In contrast there is the limitation of a flat SMP system, not illustrated. Here, as sockets are added, the bus (data and address path), under high activity, gets overloaded and easily becomes a performance bottleneck. NUMA adds an intermediate level of memory shared amongst a few cores per socket as illustrated above, so that data accesses do not have to travel over a single bus. Unfortunately the way NUMA does this adds its own limitations. This, as visualized in the illustration above, happens when data is stored in memory associated with Socket2 and is accessed by a CPU (core) in Socket0. The processors use the "Bus Interconnect" to create gateways between the sockets (nodes) enabling inter-socket access to memory. These "Bus Interconnect" hops add data access delays when a CPU (core) accesses memory associated with a remote socket (node). For terminology we refer to sockets as "nodes" where access to each others' distinct resources such as memory make them "siblings" with a designated "distance" between them. A specific design is described under the ACPI (Advanced Configuration and Power Interface Specification) within the chapter explaining the system's SLIT (System Locality Distance Information Table). These patches extend core libvirt's XML description of a virtual machine's hardware to include NUMA distance information for sibling nodes, which is then passed to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"), hence these core libvirt extensions can also help other drivers in supporting this feature. The XML changes made allow to describe the (or node/sockets) amongst node identifiers and propagate these towards the numa domain functionality finally adding support to libxl. [below is an example illustrating a 4 node/socket setup] By default on libxl, if no are given to describe the SLIT data between different s, this patch will default to a scheme using 10 for local and 21 for any remote node/socket, which is the assumption of guest OS when no SLIT is specified. While SLIT is optional, libxl requires that distances are set nonetheless. On Linux systems the SLIT detail can be listed with help of the 'numactl -H' command. An above HVM guest as described would on such prompt with below output. [root@f25 ~]# numactl -H available: 4 nodes (0-3) node 0 cpus: 0 4 5 6 7 node 0 size: 1988 MB node 0 free: 1743 MB node 1 cpus: 1 8 9 10 12 13 14 15 node 1 size: 1946 MB node 1 free: 1885 MB node 2 cpus: 2 11 node 2 siz
[libvirt] [PATCH v3 2/4] libxl: vnuma support
From: Wim ten Have This patch generates a NUMA distance-aware libxl description from the information extracted from a NUMA distance-aware libvirt XML file. By default, if no NUMA node distance information is supplied in the libvirt XML file, this patch uses the distances 10 for local and 21 for remote nodes/sockets." Signed-off-by: Wim ten Have --- Changes on v1: - Fix function calling (coding) standards. - Use virReportError(...) under all failing circumstances. - Reduce redundant (format->parse) code sorting bitmaps. - Avoid non GNU shorthand notations, difficult code practise. Changes on v2: - Have autonomous defaults applied from virDomainNumaGetNodeDistance. - Automatically make Xen driver simulate vnuma pnodes on non NUMA h/w. - Compute 'memory unit=' making it the sum of all node memory. --- src/libxl/libxl_conf.c | 120 +++ src/libxl/libxl_driver.c | 3 +- 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index 4416a09..5fb3561 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -618,6 +618,121 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, return 0; } +#ifdef LIBXL_HAVE_VNUMA +static int +libxlMakeVnumaList(virDomainDefPtr def, + libxl_ctx *ctx, + libxl_domain_config *d_config) +{ +int ret = -1; +size_t i, j; +size_t nr_nodes; +size_t num_vnuma; +bool simulate = false; +virBitmapPtr bitmap = NULL; +virDomainNumaPtr numa = def->numa; +libxl_domain_build_info *b_info = &d_config->b_info; +libxl_physinfo physinfo; +libxl_vnode_info *vnuma_nodes = NULL; + +if (!numa) +return 0; + +num_vnuma = virDomainNumaGetNodeCount(numa); +if (!num_vnuma) +return 0; + +libxl_physinfo_init(&physinfo); +if (libxl_get_physinfo(ctx, &physinfo) < 0) { +libxl_physinfo_dispose(&physinfo); +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_physinfo_info failed")); +return -1; +} +nr_nodes = physinfo.nr_nodes; +libxl_physinfo_dispose(&physinfo); + +if (num_vnuma > nr_nodes) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Number of configured numa cells %zu exceeds the physical available nodes %zu, guest simulates numa"), + num_vnuma, nr_nodes); +simulate = true; +} + +/* + * allocate the vnuma_nodes for assignment under b_info. + */ +if (VIR_ALLOC_N(vnuma_nodes, num_vnuma) < 0) +return -1; + +/* + * parse the vnuma vnodes data. + */ +for (i = 0; i < num_vnuma; i++) { +int cpu; +libxl_bitmap vcpu_bitmap; +libxl_vnode_info *p = &vnuma_nodes[i]; + +libxl_vnode_info_init(p); + +/* pnode */ +p->pnode = simulate ? 0 : i; + +/* memory size */ +p->memkb = virDomainNumaGetNodeMemorySize(numa, i); + +/* vcpus */ +bitmap = virDomainNumaGetNodeCpumask(numa, i); +if (bitmap == NULL) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma sibling %zu missing vcpus set"), i); +goto cleanup; +} + +if ((cpu = virBitmapNextSetBit(bitmap, -1)) < 0) +goto cleanup; + +libxl_bitmap_init(&vcpu_bitmap); +if (libxl_cpu_bitmap_alloc(ctx, &vcpu_bitmap, b_info->max_vcpus)) { +virReportOOMError(); +goto cleanup; +} + +do { +libxl_bitmap_set(&vcpu_bitmap, cpu); +} while ((cpu = virBitmapNextSetBit(bitmap, cpu)) >= 0); + +libxl_bitmap_copy_alloc(ctx, &p->vcpus, &vcpu_bitmap); +libxl_bitmap_dispose(&vcpu_bitmap); + +/* vdistances */ +if (VIR_ALLOC_N(p->distances, num_vnuma) < 0) +goto cleanup; +p->num_distances = num_vnuma; + +for (j = 0; j < num_vnuma; j++) +p->distances[j] = virDomainNumaGetNodeDistance(numa, i, j); +} + +b_info->vnuma_nodes = vnuma_nodes; +b_info->num_vnuma_nodes = num_vnuma; + +ret = 0; + + cleanup: +if (ret) { +for (i = 0; i < num_vnuma; i++) { +libxl_vnode_info *p = &vnuma_nodes[i]; + +VIR_FREE(p->distances); +} +VIR_FREE(vnuma_nodes); +} + +return ret; +} +#endif + static int libxlDiskSetDiscard(libxl_device_disk *x_disk, int discard) { @@ -2208,6 +2323,11 @@ libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, if (libxlMakeDomBuildInfo(def, ctx, caps, d_config) < 0) return -1; +#ifdef LIBXL_HAVE_VNUMA +if (libxlMakeVnumaList(def, ctx, d_config) < 0) +return -1; +#endif + if (libxlMakeDiskList(def, d_config) <
[libvirt] [PATCH v3 1/4] numa: describe siblings distances within cells
From: Wim ten Have Add libvirtd NUMA cell domain administration functionality to describe underlying cell id sibling distances in full fashion when configuring HVM guests. [below is an example of a 4 node setup] Changes under this commit concern all those that require adding the valid data structures, virDomainNuma* functional routines and the XML doc schema enhancements to enforce appropriate administration. These changes are accompanied with topic related documentation under docs/formatdomain.html within the "CPU model and topology" extending the "Guest NUMA topology" paragraph. For terminology we refer to sockets as "nodes" where access to each others' distinct resources such as memory make them "siblings" with a designated "distance" between them. A specific design is described under the ACPI (Advanced Configuration and Power Interface Specification) within the chapter explaining the system's SLIT (System Locality Distance Information Table). These patches extend core libvirt's XML description of a virtual machine's hardware to include NUMA distance information for sibling nodes, which is then passed to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"), hence these core libvirt extensions can also help other drivers in supporting this feature. These changes alter the docs/schemas/cputypes.rng enforcing domain administration to follow the syntax below per numa cell id. These changes also alter docs/schemas/basictypes.rng to add "numaDistanceValue" which is an "unsignedInt" with a minimum value of 10 as 0-9 are reserved values and can not be used as System Locality Distance Information Table data. Signed-off-by: Wim ten Have --- Changes on v1: - Add changes to docs/formatdomain.html.in describing schema update. Changes on v2: - Automatically apply distance symmetry maintaining cell <-> sibling. - Check for maximum '255' on numaDistanceValue. - Automatically complete empty distance ranges. - Check that sibling_id's are in range with cell identifiers. - Allow non-contiguous ranges, starting from any node id. - Respect parameters as ATTRIBUTE_NONNULL fix functions and callers. - Add and apply topoly for LOCAL_DISTANCE=10 and REMOTE_DISTANCE=20. --- docs/formatdomain.html.in | 70 +- docs/schemas/basictypes.rng | 9 ++ docs/schemas/cputypes.rng | 18 +++ src/conf/cpu_conf.c | 2 +- src/conf/numa_conf.c| 323 +++- src/conf/numa_conf.h| 25 +++- src/libvirt_private.syms| 6 + 7 files changed, 444 insertions(+), 9 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 8ca7637..19a2ac7 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1529,7 +1529,75 @@ - This guest NUMA specification is currently available only for QEMU/KVM. + This guest NUMA specification is currently available only for + QEMU/KVM and Xen. Whereas Xen driver also allows for a distinct + description of NUMA arranged sibling cell + distances Since 3.6.0. + + + + Under NUMA h/w architecture, distinct resources such as memory + create a designated distance between cell and + siblings that now can be described with the help of + distances. A detailed describtion can be found within + the ACPI (Advanced Configuration and Power Interface Specification) + within the chapter explaining the system's SLIT (System Locality + Distance Information Table). + + + +... +<cpu> + ... + <numa> +<cell id='0' cpus='0,4-7' memory='512000' unit='KiB'> + <distances> +<sibling id='0' value='10'/> +<sibling id='1' value='21'/> +<sibling id='2' value='31'/> +<sibling id='3' value='41'/> + </distances> +</cell> +<cell id='1' cpus='1,8-10,12-15' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='21'/> +<sibling id='1' value='10'/> +<sibling id='2' value='21'/> +<sibling id='3' value='31'/> + </distances> +
Re: [libvirt] [PATCH v1] numa: compute and set matching vcpus for numa domains
On Thu, 20 Jul 2017 11:10:31 +0100 "Daniel P. Berrange" wrote: > On Thu, Jul 20, 2017 at 11:29:26AM +0200, Wim Ten Have wrote: > > From: Wim ten Have > > > > The QEMU driver can erroneously allocate more vpus to a domain > > than there are cpus in the domain if the element is used > > to describe element topology. Fix this by calculating > > the number of cpus described in the element of a > > element and comparing it to the number of vcpus. If the number > > of vcpus is greater than the number of cpus, cap the number of > > vcpus to the number of cpus. > > > > This patch also adds a small libvirt documentation update under > > the "CPU Allocation" section. > > > > Signed-off-by: Wim ten Have > > --- > > docs/formatdomain.html.in | 9 - > > src/conf/domain_conf.c| 14 +++--- > > 2 files changed, 19 insertions(+), 4 deletions(-) > > > > diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in > > index 07208ee..3c85307 100644 > > --- a/docs/formatdomain.html.in > > +++ b/docs/formatdomain.html.in > > @@ -501,7 +501,14 @@ > >vcpu > >The content of this element defines the maximum number of virtual > > CPUs allocated for the guest OS, which must be between 1 and > > -the maximum supported by the hypervisor. > > +the maximum supported by the hypervisor. If a numa > > +element is contained within the cpu element, the > > +number of virtual CPUs to be allocated is compared with the sum > > +of the cpus attributes across the cell > > +elements within the numa element. If the number of > > +virtual CPUs is greater than the sum of the cpus > > +attributes, the number of virtual CPUs is capped at sum of the > > +cpus attributes. > > > > cpuset > > > > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > > index 958a5b7..73d7f4f 100644 > > --- a/src/conf/domain_conf.c > > +++ b/src/conf/domain_conf.c > > @@ -3844,12 +3844,20 @@ virDomainDefPostParseMemory(virDomainDefPtr def, > > unsigned long long numaMemory = 0; > > unsigned long long hotplugMemory = 0; > > > > -/* Attempt to infer the initial memory size from the sum NUMA memory > > sizes > > - * in case ABI updates are allowed or the element wasn't > > specified */ > > +/* Attempt to infer the initial memory size from the sum NUMA memory > > + * sizes in case ABI updates are allowed or the element > > + * wasn't specified. Also cap the vcpu count to the sum of NUMA cpus > > + * allocated for all nodes. */ > > if (def->mem.total_memory == 0 || > > parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE || > > -parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE_MIGRATION) > > +parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE_MIGRATION) { > > +unsigned int numaVcpus = 0; > > + > > +if ((numaVcpus = virDomainNumaGetCPUCountTotal(def->numa))) > > +virDomainDefSetVcpus(def, numaVcpus); > > + > > numaMemory = virDomainNumaGetMemorySize(def->numa); > > +} > > AFAICT, this scenario is simply a user configuration mistake, and so we > should report an error message to them to fix it. Not to push but I think this is the correct way ... O:-). Ok, perhaps the documentation note/commit message should be slightly rewritten aswell as the altered comment on specific code section. The change is _NOT_ changing final guest provided 'maxvcpus' but 'vcpus' instead. This means that it adds the "current='#count'" under the vcpu element if such is less then vcpu 'maxvcpus' provided count. If equal there is no issue and if larger there is indeed a correct error message (exceptional condition). Imagine simpel domain description _WITHOUT_ and below element description; 16 This nicely creates a guest with 4 domain CPUs added, where the administrator can "hot-add" an additional 12 CPUs making the full 'maxvcpus' defined 16. "hot-add" by virsh; virsh # setvcpus kvm26 16 Without the fix under former domain description libvirt would bring whole '16' vcpus to the guest, and if there was a current value given all by current on top of the 16 .. With the fix, its post configuration action will now nicely rewrite the element as shown below; 16 ..
[libvirt] [PATCH v1] numa: compute and set matching vcpus for numa domains
From: Wim ten Have The QEMU driver can erroneously allocate more vpus to a domain than there are cpus in the domain if the element is used to describe element topology. Wim ten Have (1): numa: compute and set matching vcpus for numa domains docs/formatdomain.html.in | 9 - src/conf/domain_conf.c| 14 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1] numa: compute and set matching vcpus for numa domains
From: Wim ten Have The QEMU driver can erroneously allocate more vpus to a domain than there are cpus in the domain if the element is used to describe element topology. Fix this by calculating the number of cpus described in the element of a element and comparing it to the number of vcpus. If the number of vcpus is greater than the number of cpus, cap the number of vcpus to the number of cpus. This patch also adds a small libvirt documentation update under the "CPU Allocation" section. Signed-off-by: Wim ten Have --- docs/formatdomain.html.in | 9 - src/conf/domain_conf.c| 14 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 07208ee..3c85307 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -501,7 +501,14 @@ vcpu The content of this element defines the maximum number of virtual CPUs allocated for the guest OS, which must be between 1 and -the maximum supported by the hypervisor. +the maximum supported by the hypervisor. If a numa +element is contained within the cpu element, the +number of virtual CPUs to be allocated is compared with the sum +of the cpus attributes across the cell +elements within the numa element. If the number of +virtual CPUs is greater than the sum of the cpus +attributes, the number of virtual CPUs is capped at sum of the +cpus attributes. cpuset diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 958a5b7..73d7f4f 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -3844,12 +3844,20 @@ virDomainDefPostParseMemory(virDomainDefPtr def, unsigned long long numaMemory = 0; unsigned long long hotplugMemory = 0; -/* Attempt to infer the initial memory size from the sum NUMA memory sizes - * in case ABI updates are allowed or the element wasn't specified */ +/* Attempt to infer the initial memory size from the sum NUMA memory + * sizes in case ABI updates are allowed or the element + * wasn't specified. Also cap the vcpu count to the sum of NUMA cpus + * allocated for all nodes. */ if (def->mem.total_memory == 0 || parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE || -parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE_MIGRATION) +parseFlags & VIR_DOMAIN_DEF_PARSE_ABI_UPDATE_MIGRATION) { +unsigned int numaVcpus = 0; + +if ((numaVcpus = virDomainNumaGetCPUCountTotal(def->numa))) +virDomainDefSetVcpus(def, numaVcpus); + numaMemory = virDomainNumaGetMemorySize(def->numa); +} /* calculate the sizes of hotplug memory */ for (i = 0; i < def->nmems; i++) -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [RFC PATCH v2 1/4] numa: describe siblings distances within cells
On Wed, 28 Jun 2017 15:21:29 +0100 "Daniel P. Berrange" wrote: > On Wed, Jun 28, 2017 at 03:56:36PM +0200, Wim Ten Have wrote: > > From: Wim ten Have > > > > Add libvirtd NUMA cell domain administration functionality to > > describe underlying cell id sibling distances in full fashion > > when configuring HVM guests. > > > > [below is an example of a 4 node setup] Hi Daniel, I think you're best to consult on below issue too. This given it is related to the changes I am preparing but was obviously not yet discussed. Your other hint towards symmetry on distance configuration and autonomous actions made me more carefully approach whole. Working support, specifically for xenlight driver I for certain topic related detail consult QEMU/KVM behavior. Here when describing a guest XML configuration in libvirt there's this nuance when comes into the game. Imagine below. 8 ... Running QEMU/KVM with such onlines 5 CPUs. This is wrong IMHO. lscpu Architecture:x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 5 On-line CPU(s) list: 0-4 ... NUMA node(s):2 ... NUMA node0 CPU(s): 0,2,4 NUMA node1 CPU(s): 1,3 My thinking is that virDomainDefPostParse() path and forcibly put to '4'. 8 This is of course a related issue. Is it worth fixing this (QEMU/KVM) case? If doing that would that potentially hurt specific use and that way libvirt? - Wim. -- Wim ten Have | Consulting Member of Technical Staff Oracle Linux and VM Development Engineering ORACLE Nederland BV | Hertogswetering 163-167 | 3543 AS Utrecht/NL -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [RFC PATCH v2 1/4] numa: describe siblings distances within cells
On Wed, 28 Jun 2017 15:21:29 +0100 "Daniel P. Berrange" wrote: Hi Daniel, Appreciate your input and comments. Let me work on package and return asap. One quick question; For future i am looking into way to somehow auto- tune node (cell/socket/cpu) assignment under directive. Such in a way brings intelligence into libvirt and therefor not wanted. Perhaps this is up to s/w stack utilizing guests per libvirt. If there are thoughts on such matter then i am very interested to hear about them. > On Wed, Jun 28, 2017 at 03:56:36PM +0200, Wim Ten Have wrote: > > From: Wim ten Have > > > > Add libvirtd NUMA cell domain administration functionality to > > describe underlying cell id sibling distances in full fashion > > when configuring HVM guests. > > > > [below is an example of a 4 node setup] > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Changes under this commit concern all those that require adding > > the valid data structures, virDomainNuma* functional routines and the > > XML doc schema enhancements to enforce appropriate administration. > > > > These changes are accompanied with topic related documentation under > > docs/formatdomain.html within the "CPU model and topology" extending the > > "Guest NUMA topology" paragraph. > > > > For terminology we refer to sockets as "nodes" where access to each > > others' distinct resources such as memory make them "siblings" with a > > designated "distance" between them. A specific design is described under > > the ACPI (Advanced Configuration and Power Interface Specification) > > within the chapter explaining the system's SLIT (System Locality Distance > > Information Table). > > > > These patches extend core libvirt's XML description of a virtual machine's > > hardware to include NUMA distance information for sibling nodes, which > > is then passed to Xen guests via libxl. Recently qemu landed support for > > constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA > > distance for different NUMA nodes"), hence these core libvirt extensions > > can also help other drivers in supporting this feature. > > > > These changes alter the docs/schemas/cputypes.rng enforcing > > domain administration to follow the syntax below per numa cell id. > > > > These changes also alter docs/schemas/basictypes.rng to add > > "numaDistanceValue" which is an "unsignedInt" with a minimum value > > of 10 as 0-9 are reserved values and can not be used as System > > Locality Distance Information Table data. > > > > Signed-off-by: Wim ten Have > > --- > > docs/formatdomain.html.in | 64 ++- > > docs/schemas/basictypes.rng | 8 ++ > > docs/schemas/cputypes.rng | 18 +++ > > src/conf/cpu_conf.c | 2 +- > > src/conf/numa_conf.c| 260 > > +++- > > src/conf/numa_conf.h| 25 - > > src/libvirt_private.syms| 6 + > > 7 files changed, 376 insertions(+), 7 deletions(-) > > > > diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in > > index 36bea67..b9736e3 100644 > > --- a/docs/formatdomain.html.in > > +++ b/docs/formatdomain.html.in > > @@ -1510,7 +1510,69 @@ > > > > > > > > - This guest NUMA specification is currently available only for > > QEMU/KVM. > > + This guest NUMA specification is currently available only for > > + QEMU/KVM and Xen. Whereas Xen driver also allows for a distinct > > + description of NUMA arranged sibling cell > > + distances Since 3.6.0. > > + > > + > > + > > + Under NUMA h/w architecture, distinct resources such as memory > > + create a designated distance between cell and > > + siblings that now can be described with the help of > > + distances. A detailed describtion can be found within
[libvirt] [RFC PATCH v2 1/4] numa: describe siblings distances within cells
From: Wim ten Have Add libvirtd NUMA cell domain administration functionality to describe underlying cell id sibling distances in full fashion when configuring HVM guests. [below is an example of a 4 node setup] Changes under this commit concern all those that require adding the valid data structures, virDomainNuma* functional routines and the XML doc schema enhancements to enforce appropriate administration. These changes are accompanied with topic related documentation under docs/formatdomain.html within the "CPU model and topology" extending the "Guest NUMA topology" paragraph. For terminology we refer to sockets as "nodes" where access to each others' distinct resources such as memory make them "siblings" with a designated "distance" between them. A specific design is described under the ACPI (Advanced Configuration and Power Interface Specification) within the chapter explaining the system's SLIT (System Locality Distance Information Table). These patches extend core libvirt's XML description of a virtual machine's hardware to include NUMA distance information for sibling nodes, which is then passed to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"), hence these core libvirt extensions can also help other drivers in supporting this feature. These changes alter the docs/schemas/cputypes.rng enforcing domain administration to follow the syntax below per numa cell id. These changes also alter docs/schemas/basictypes.rng to add "numaDistanceValue" which is an "unsignedInt" with a minimum value of 10 as 0-9 are reserved values and can not be used as System Locality Distance Information Table data. Signed-off-by: Wim ten Have --- docs/formatdomain.html.in | 64 ++- docs/schemas/basictypes.rng | 8 ++ docs/schemas/cputypes.rng | 18 +++ src/conf/cpu_conf.c | 2 +- src/conf/numa_conf.c| 260 +++- src/conf/numa_conf.h| 25 - src/libvirt_private.syms| 6 + 7 files changed, 376 insertions(+), 7 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 36bea67..b9736e3 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1510,7 +1510,69 @@ - This guest NUMA specification is currently available only for QEMU/KVM. + This guest NUMA specification is currently available only for + QEMU/KVM and Xen. Whereas Xen driver also allows for a distinct + description of NUMA arranged sibling cell + distances Since 3.6.0. + + + + Under NUMA h/w architecture, distinct resources such as memory + create a designated distance between cell and + siblings that now can be described with the help of + distances. A detailed describtion can be found within + the ACPI (Advanced Configuration and Power Interface Specification) + within the chapter explaining the system's SLIT (System Locality + Distance Information Table). + + + +... +<cpu> + ... + <numa> +<cell id='0' cpus='0,4-7' memory='512000' unit='KiB'> + <distances> +<sibling id='0' value='10'/> +<sibling id='1' value='21'/> +<sibling id='2' value='31'/> +<sibling id='3' value='41'/> + </distances> +</cell> +<cell id='1' cpus='1,8-10,12-15' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='21'/> +<sibling id='1' value='10'/> +<sibling id='2' value='21'/> +<sibling id='3' value='31'/> + </distances> +</cell> +<cell id='2' cpus='2,11' memory='512000' unit='KiB' memAccess='shared'> + <distances> +<sibling id='0' value='31'/> +<sibling id='1' value='21'/> +<sibling id='2' value='10'/> +<sibling id='3' value='21'/> + </distances> +</cell> +<cell id='3' cpus='3' memory='51200
[libvirt] [RFC PATCH v2 2/4] libxl: vnuma support
From: Wim ten Have This patch generates a NUMA distance-aware libxl description from the information extracted from a NUMA distance-aware libvirt XML file. By default, if no NUMA node distance information is supplied in the libvirt XML file, this patch uses the distances 10 for local and 21 for remote nodes/sockets." Signed-off-by: Wim ten Have --- src/libxl/libxl_conf.c | 124 + 1 file changed, 124 insertions(+) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index 938e09d..7d9 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -617,6 +617,125 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, return 0; } +#ifdef LIBXL_HAVE_VNUMA +static int +libxlMakeVnumaList(virDomainDefPtr def, + libxl_ctx *ctx, + libxl_domain_config *d_config) +{ +int ret = -1; +size_t i, j; +size_t nr_nodes; +size_t num_vnuma; +virBitmapPtr bitmap = NULL; +virDomainNumaPtr numa = def->numa; +libxl_domain_build_info *b_info = &d_config->b_info; +libxl_physinfo physinfo; +libxl_vnode_info *vnuma_nodes = NULL; + +if (!numa) +return 0; + +num_vnuma = virDomainNumaGetNodeCount(numa); +if (!num_vnuma) +return 0; + +libxl_physinfo_init(&physinfo); +if (libxl_get_physinfo(ctx, &physinfo) < 0) { +libxl_physinfo_dispose(&physinfo); +virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_physinfo_info failed")); +return -1; +} +nr_nodes = physinfo.nr_nodes; +libxl_physinfo_dispose(&physinfo); + +if (num_vnuma > nr_nodes) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Number of configured numa cells %zu exceeds the physical available nodes %zu"), + num_vnuma, nr_nodes); +return -1; +} + +/* + * allocate the vnuma_nodes for assignment under b_info. + */ +if (VIR_ALLOC_N(vnuma_nodes, num_vnuma) < 0) +return -1; + +/* + * parse the vnuma vnodes data. + */ +for (i = 0; i < num_vnuma; i++) { +int cpu; +libxl_bitmap vcpu_bitmap; +libxl_vnode_info *p = &vnuma_nodes[i]; + +libxl_vnode_info_init(p); + +/* pnode */ +p->pnode = i; + +/* memory size */ +p->memkb = virDomainNumaGetNodeMemorySize(numa, i); + +/* vcpus */ +bitmap = virDomainNumaGetNodeCpumask(numa, i); +if (bitmap == NULL) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma sibling %zu missing vcpus set"), i); +goto cleanup; +} + +if ((cpu = virBitmapNextSetBit(bitmap, -1)) < 0) +goto cleanup; + +libxl_bitmap_init(&vcpu_bitmap); +if (libxl_cpu_bitmap_alloc(ctx, &vcpu_bitmap, b_info->max_vcpus)) { +virReportOOMError(); +goto cleanup; +} + +do { +libxl_bitmap_set(&vcpu_bitmap, cpu); +} while ((cpu = virBitmapNextSetBit(bitmap, cpu)) >= 0); + +libxl_bitmap_copy_alloc(ctx, &p->vcpus, &vcpu_bitmap); +libxl_bitmap_dispose(&vcpu_bitmap); + +/* vdistances */ +if (VIR_ALLOC_N(p->distances, num_vnuma) < 0) +goto cleanup; +p->num_distances = num_vnuma; + +/* + * Apply the configured distance value. If not + * available set 10 for local or 21 for remote nodes. + */ +for (j = 0; j < num_vnuma; j++) +if (!(p->distances[j] = virDomainNumaGetNodeDistance(numa, i, j))) +p->distances[j] = (i == j) ? 10 : 21; +} + +b_info->vnuma_nodes = vnuma_nodes; +b_info->num_vnuma_nodes = num_vnuma; + +ret = 0; + + cleanup: +if (ret) { +for (i = 0; i < num_vnuma; i++) { +libxl_vnode_info *p = &vnuma_nodes[i]; + +VIR_FREE(p->distances); +} +VIR_FREE(vnuma_nodes); +} + +return ret; +} +#endif + static int libxlDiskSetDiscard(libxl_device_disk *x_disk, int discard) { @@ -2207,6 +2326,11 @@ libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, if (libxlMakeDomBuildInfo(def, ctx, caps, d_config) < 0) return -1; +#ifdef LIBXL_HAVE_VNUMA +if (libxlMakeVnumaList(def, ctx, d_config) < 0) +return -1; +#endif + if (libxlMakeDiskList(def, d_config) < 0) return -1; -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC PATCH v2 0/4] numa: describe sibling nodes distances
From: Wim ten Have This patch extents guest domain administration adding support to advertise node sibling distances when configuring HVM numa guests. NUMA (non-uniform memory access), a method of configuring a cluster of nodes within a single multiprocessing system such that it shares processor local memory amongst others improving performance and the ability of the system to be expanded. A NUMA system could be illustrated as shown below. Within this 4-node system, every socket is equipped with its own distinct memory. The whole typically resembles a SMP (symmetric multiprocessing) system being a "tightly-coupled," "share everything" system in which multiple processors are working under a single operating system and can access each others' memory over multiple "Bus Interconnect" paths. +-+-+-+ +-+-+-+ | M | CPU | CPU | | CPU | CPU | M | | E | | | | | | E | | M +- Socket0 -+ +- Socket3 -+ M | | O | | | | | | O | | R | CPU | CPU <-> CPU | CPU | R | | Y | | | | | | Y | +-+--^--+-+ +-+--^--+-+ | | | Bus Interconnect | | | +-+--v--+-+ +-+--v--+-+ | M | | | | | | M | | E | CPU | CPU <-> CPU | CPU | E | | M | | | | | | M | | O +- Socket1 -+ +- Socket2 -+ O | | R | | | | | | R | | Y | CPU | CPU | | CPU | CPU | Y | +-+-+-+ +-+-+-+ In contrast there is the limitation of a flat SMP system, not illustrated. Here, as sockets are added, the bus (data and address path), under high activity, gets overloaded and easily becomes a performance bottleneck. NUMA adds an intermediate level of memory shared amongst a few cores per socket as illustrated above, so that data accesses do not have to travel over a single bus. Unfortunately the way NUMA does this adds its own limitations. This, as visualized in the illustration above, happens when data is stored in memory associated with Socket2 and is accessed by a CPU (core) in Socket0. The processors use the "Bus Interconnect" to create gateways between the sockets (nodes) enabling inter-socket access to memory. These "Bus Interconnect" hops add data access delays when a CPU (core) accesses memory associated with a remote socket (node). For terminology we refer to sockets as "nodes" where access to each others' distinct resources such as memory make them "siblings" with a designated "distance" between them. A specific design is described under the ACPI (Advanced Configuration and Power Interface Specification) within the chapter explaining the system's SLIT (System Locality Distance Information Table). These patches extend core libvirt's XML description of a virtual machine's hardware to include NUMA distance information for sibling nodes, which is then passed to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"), hence these core libvirt extensions can also help other drivers in supporting this feature. The XML changes made allow to describe the (or node/sockets) amongst node identifiers and propagate these towards the numa domain functionality finally adding support to libxl. [below is an example illustrating a 4 node/socket setup] By default on libxl, if no are given to describe the SLIT data between different s, this patch will default to a scheme using 10 for local and 21 for any remote node/socket, which is the assumption of guest OS when no SLIT is specified. While SLIT is optional, libxl requires that distances are set nonetheless. On Linux systems the SLIT detail can be listed with help of the 'numactl -H' command. An above HVM guest as described would on such prompt with below output. [root@f25 ~]# numactl -H available: 4 nodes (0-3) node 0 cpus: 0 4 5 6 7 node 0 size: 1988 MB node 0 free: 1743 MB node 1 cpus: 1 8 9 10 12 13 14 15 node 1 size: 1946 MB node 1 free: 1885 MB node 2 cpus: 2 11 node 2 siz
[libvirt] [RFC PATCH v2 4/4] xlconfigtest: add tests for numa cell sibling distances
From: Wim ten Have Test a bidirectional xen-xl domxml to and from native for numa support administration as brought under this patch series. Signed-off-by: Wim ten Have --- .../test-fullvirt-vnuma-nodistances.cfg| 26 +++ .../test-fullvirt-vnuma-nodistances.xml| 53 ++ tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 +++ tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 ++ tests/xlconfigtest.c | 4 ++ 5 files changed, 190 insertions(+) create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.xml diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg new file mode 100644 index 000..9871f21 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0-1", "vdistances=10,21,21,21" ], [ "pnode=1", "size=2048", "vcpus=2-3", "vdistances=21,10,21,21" ], [ "pnode=2", "size=2048", "vcpus=4-5", "vdistances=21,21,10,21" ], [ "pnode=3", "size=2048", "vcpus=6-7", "vdistances=21,21,21,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml new file mode 100644 index 000..9cab3ca --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml @@ -0,0 +1,53 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 8 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigdata/test-fullvirt-vnuma.cfg b/tests/xlconfigdata/test-fullvirt-vnuma.cfg new file mode 100644 index 000..91e233a --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0-1", "vdistances=10,21,31,41" ], [ "pnode=1", "size=2048", "vcpus=2-3", "vdistances=21,10,21,31" ], [ "pnode=2", "size=2048", "vcpus=4-5", "vdistances=31,21,10,21" ], [ "pnode=3", "size=2048", "vcpus=6-7", "vdistances=41,31,21,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma.xml b/tests/xlconfigdata/test-fullvirt-vnuma.xml new file mode 100644 index 000..5368b0d --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma.xml @@ -0,0 +1,81 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 8 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
[libvirt] [RFC PATCH v2 3/4] xenconfig: add domxml conversions for xen-xl
From: Wim ten Have This patch converts NUMA configurations between the Xen libxl configuration file format and libvirt's XML format. XML HVM domain configuration: Xen xl.cfg domain configuration: vnuma = [["pnode=0","size=2048","vcpus=0-1","vdistances=10,21,31,41"], ["pnode=1","size=2048","vcpus=2-3","vdistances=21,10,21,31"], ["pnode=2","size=2048","vcpus=4-5","vdistances=31,21,10,21"], ["pnode=3","size=2048","vcpus=6-7","vdistances=41,31,21,10"]] If there is no XML description amongst the data the conversion schema from xml to native will generate 10 for local and 21 for all remote instances. Signed-off-by: Wim ten Have --- src/xenconfig/xen_xl.c | 303 + 1 file changed, 303 insertions(+) diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index cac440c..d9f7028 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -309,6 +309,168 @@ xenParseXLSpice(virConfPtr conf, virDomainDefPtr def) return -1; } +#ifdef LIBXL_HAVE_VNUMA +static int +xenParseXLVnuma(virConfPtr conf, virDomainDefPtr def) +{ +int ret = -1; +char *tmp = NULL; +char **token = NULL; +size_t vcpus = 0; + +virConfValuePtr list; +virDomainNumaPtr numa; + +numa = def->numa; +if (numa == NULL) +return -1; + +list = virConfGetValue(conf, "vnuma"); +if (list && list->type == VIR_CONF_LIST) { +size_t nr_nodes = 0, vnodeCnt = 0; +virConfValuePtr vnode = list->list; +virCPUDefPtr cpu; + +vnode = list->list; +while (vnode) { +vnode = vnode->next; +nr_nodes++; +} + +if (!virDomainNumaSetNodeCount(numa, nr_nodes)) +goto cleanup; + +list = list->list; + +if (VIR_ALLOC(cpu) < 0) +goto cleanup; + +while (list) { + +/* Is there a sublist (vnode)? */ +if (list && list->type == VIR_CONF_LIST) { +vnode = list->list; + +while (vnode && vnode->type == VIR_CONF_STRING) { +const char *data; +const char *str = vnode->str; + +if (!str || + !(data = strrchr(str, '='))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode invalid format '%s'"), + str); +goto skipvnode; +} +data++; + +if (*data) { +size_t len; +char vtoken[64]; + +if (STRPREFIX(str, "pnode")) { +unsigned int cellid; + +len = strlen(data); +if (!virStrncpy(vtoken, data, +len, sizeof(vtoken))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu pnode '%s' too long for destination"), + vnodeCnt, data); +goto cleanup; +} + +if ((virStrToLong_ui(vtoken, NULL, 10, &cellid) < 0) || +(cellid >= nr_nodes)) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu invalid value pnode '%s'"), + vnodeCnt, data); +goto cleanup; +} +} else if (STRPREFIX(str, "size")) { +unsigned long long kbsize; + +len = strlen(data); +if (!virStrncpy(vtoken, data, +len, sizeof(vtoken))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu size '%s' too long for destination"), + vnodeCnt, data); +
Re: [libvirt] [RFC PATCH v1 1/4] numa: describe siblings distances within cells
On Wed, 21 Jun 2017 21:08:30 +0200 Wim ten Have wrote: > On Mon, 19 Jun 2017 12:26:20 -0600 > Jim Fehlig wrote: > > > On 06/12/2017 12:54 PM, Wim Ten Have wrote: > > > From: Wim ten Have > > > > > > Add libvirtd NUMA cell domain administration functionality to > > > describe underlying cell id sibling distances in full fashion > > > when configuring HVM guests. > > > > Thanks for the detailed cover letter :-). Some of that information (e.g. > > ACPI > > spec, SLIT table, qemu support) would be useful in this commit message. > > I'll follow up on that under v2. > > > > [below is an example of a 4 node setup] > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Others on the list are better XML designers, but the following seems to > > achieve > > the same and is a bit more compact > > > > > > > > > > > > > > > > > > CC danpb, who often has good ideas on schema design. > > Will give Daniel and (others) time prior submitting v2. Otherwise > follow your suggestion above for its compact and clear annotation. One thing on specific, forgot to mention previous reply, is that the selected syntax was derived from way libvirt describes host capabilities introduced under; commit 8ba0a58f8d9db9eeed003b889dfcd3451d10fbca Author: Michal Privoznik Date: Tue Jun 3 15:18:27 2014 +0200 "virCaps: Expose distance between host NUMA nodes" Regards, - Wim. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Changes under this commit concern all those that require adding > > > the valid data structures, virDomainNuma* functional routines and the > > > XML doc schema enhancements to enforce appropriate administration. > > > > One item you forgot was docs/formatdomain.html.in. Changes in schema should > > always be described by accompanying changes in documentation. > > Oops ... noted. I'll go slow on coming back with v2 giving you and > other time to further comment. Thanks for reviewing! > > - Wim. > > > Regards, > > Jim > > > > > > > > These changes alter the docs/schemas/cputypes.rng enforcing > > > domain administration to follow the syntax below per numa cell id. > > > > > > These changes also alter docs/schemas/basictypes.rng to add > > > "numaDistanceValue" which is an "unsignedInt" with a minimum value > > > of 10 as 0-9 are reserved values and can not be used as System > > > Locality Distance Information Table data. > > > > > > Signed-off-by: Wim ten Have > > > --- > > > docs/schemas/basictypes.rng | 8 ++ > > > docs/schemas/cputypes.rng | 18 +++ > > > src/conf/cpu_conf.c | 2 +- > > > src/conf/numa_conf.c| 260 > > > +++- > > > src/conf/numa_conf.h| 25 - > > > src/libvirt_private.syms| 6 + > > > 6 files changed, 313 insertions(+), 6 deletions(-) > > > > > > diff --git a/docs/schemas/basictypes.rng b/docs/schemas/basictypes.rng > > > index 1ea667c..a335b5d 100644 > > > --- a/docs/schemas/basictypes.rng > > > +++ b/docs/schemas/basictypes.rng > > > @@ -77,6 +77,14 @@ > > > > > > > > > > > > + > > > + > > > + > > > +10 > > > + > > > + > > > + > > > + > > > > > > > > > > > > diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng > > > index 3eef16a..c45b6df 100644 > > > --- a/docs/schemas/cputypes.rng > > > +++ b/docs/schemas/cputypes.rng > > > @@ -12
Re: [libvirt] [RFC PATCH v1 4/4] xlconfigtest: add tests for numa cell sibling distances
On Thu, 22 Jun 2017 16:16:16 +0100 Joao Martins wrote: > On 06/12/2017 07:54 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Test a bidirectional xen-xl domxml to and from native for numa > > support administration as brought under this patch series. > > > > Signed-off-by: Wim ten Have > > --- > > ... > > diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml > > b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml > > new file mode 100644 > > index 000..a576881 > > --- /dev/null > > +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml > > @@ -0,0 +1,54 @@ > > + > > + XenGuest2 > > + c7a5fdb2-cdaf-9455-926a-d65c16db1809 > > + 8388608 > > + 8388608 > > + 8 > > + > > +hvm > > +/usr/lib/xen/boot/hvmloader > > + > > + > > + > > + > > + > > + > > + > > + > > + > > We don't set/support topology info then it shouldn't be in the xml. Therefore > the test with nodistances will fail right? In that case should be > removed then. Right ... specific line should not be there. It sneaked in because i was playing with code supporting topology and unfortunate forgot to delete specific line in the test. Reason i forgot is that specific line does _NOT_ cause any issue to testing as for 'fullvirt-vnuma-nodistances' the CANONs can only go one-way (domxml to native). > > diff --git a/tests/xlconfigtest.c b/tests/xlconfigtest.c > > +DO_TEST_PARSE("fullvirt-vnuma-nodistances", false); Going backwards (domxml from native) would create XML topics listing, ignorant, default distances generated making the -nodistance CANON match fail its xml representation. > Albeit the other test doesn't have element which is good :) Indeed ... was not suppost to be in. Thanks for spotting this one. I'll remove it under v2. Rgds, - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [RFC PATCH v1 1/4] numa: describe siblings distances within cells
On Mon, 19 Jun 2017 12:26:20 -0600 Jim Fehlig wrote: > On 06/12/2017 12:54 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Add libvirtd NUMA cell domain administration functionality to > > describe underlying cell id sibling distances in full fashion > > when configuring HVM guests. > > Thanks for the detailed cover letter :-). Some of that information (e.g. ACPI > spec, SLIT table, qemu support) would be useful in this commit message. I'll follow up on that under v2. > > [below is an example of a 4 node setup] > > > > > > > > > > > > > > > > > > > > > > > > Others on the list are better XML designers, but the following seems to > achieve > the same and is a bit more compact > > > > > > > > > CC danpb, who often has good ideas on schema design. Will give Daniel and (others) time prior submitting v2. Otherwise follow your suggestion above for its compact and clear annotation. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Changes under this commit concern all those that require adding > > the valid data structures, virDomainNuma* functional routines and the > > XML doc schema enhancements to enforce appropriate administration. > > One item you forgot was docs/formatdomain.html.in. Changes in schema should > always be described by accompanying changes in documentation. Oops ... noted. I'll go slow on coming back with v2 giving you and other time to further comment. Thanks for reviewing! - Wim. > Regards, > Jim > > > > > These changes alter the docs/schemas/cputypes.rng enforcing > > domain administration to follow the syntax below per numa cell id. > > > > These changes also alter docs/schemas/basictypes.rng to add > > "numaDistanceValue" which is an "unsignedInt" with a minimum value > > of 10 as 0-9 are reserved values and can not be used as System > > Locality Distance Information Table data. > > > > Signed-off-by: Wim ten Have > > --- > > docs/schemas/basictypes.rng | 8 ++ > > docs/schemas/cputypes.rng | 18 +++ > > src/conf/cpu_conf.c | 2 +- > > src/conf/numa_conf.c| 260 > > +++- > > src/conf/numa_conf.h| 25 - > > src/libvirt_private.syms| 6 + > > 6 files changed, 313 insertions(+), 6 deletions(-) > > > > diff --git a/docs/schemas/basictypes.rng b/docs/schemas/basictypes.rng > > index 1ea667c..a335b5d 100644 > > --- a/docs/schemas/basictypes.rng > > +++ b/docs/schemas/basictypes.rng > > @@ -77,6 +77,14 @@ > > > > > > > > + > > + > > + > > +10 > > + > > + > > + > > + > > > > > > > > diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng > > index 3eef16a..c45b6df 100644 > > --- a/docs/schemas/cputypes.rng > > +++ b/docs/schemas/cputypes.rng > > @@ -129,6 +129,24 @@ > > > > > > > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > > > > > > > diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c > > index da40e9b..5d8f7be3 100644 > > --- a/src/conf/cpu_conf.c > > +++ b/src/conf/cpu_conf.c > > @@ -643,7 +643,7 @@ virCPUDefFormatBufFull(virBufferPtr buf, > > if (virCPUDefFormatBuf(&childrenBuf, def, updateCPU) < 0) > > goto cleanup; > > > > -if (virDomainNumaDefCPUFormat(&childrenBuf, numa) < 0) > > +if (virDomainNumaDefCPUFormatXML(&childrenBuf, numa) < 0) > > goto cleanup; > > > > /* Put it all together */ > > diff --git a/src/conf/n
[libvirt] [RFC PATCH v1 2/4] libxl: vnuma support
From: Wim ten Have This patch generates a NUMA distance-aware libxl description from the information extracted from a NUMA distance-aware libvirt XML file. By default, if no NUMA node distance information is supplied in the libvirt XML file, this patch uses the distances 10 for local and 21 for remote nodes/sockets." Signed-off-by: Wim ten Have --- src/libxl/libxl_conf.c | 128 + 1 file changed, 128 insertions(+) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index 04d9dd1..3e1a759 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -617,6 +617,129 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, return 0; } +#ifdef LIBXL_HAVE_VNUMA +static int +libxlMakeVnumaList( +virDomainDefPtr def, +libxl_ctx *ctx, +libxl_domain_config *d_config) +{ +int ret = -1; +char *vcpus = NULL; +size_t i, j; +size_t nr_nodes; +size_t num_vnuma; +virDomainNumaPtr numa = def->numa; +libxl_domain_build_info *b_info = &d_config->b_info; +libxl_physinfo physinfo; +libxl_vnode_info *vnuma_nodes = NULL; + +if (!numa) +return 0; + +num_vnuma = virDomainNumaGetNodeCount(numa); +if (!num_vnuma) +return 0; + +libxl_physinfo_init(&physinfo); +if (libxl_get_physinfo(ctx, &physinfo) < 0) { +libxl_physinfo_dispose(&physinfo); +VIR_WARN("libxl_get_physinfo failed"); +goto cleanup; +} +nr_nodes = physinfo.nr_nodes; +libxl_physinfo_dispose(&physinfo); + +if (num_vnuma > nr_nodes) { +virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Number of configured numa cells %zu exceeds the physical available nodes %zu"), + num_vnuma, nr_nodes); +goto cleanup; +} + +/* + * allocate the vnuma_nodes for assignment under b_info. + */ +if (VIR_ALLOC_N(vnuma_nodes, num_vnuma) < 0) +goto cleanup; + +/* + * parse the vnuma vnodes data. + */ +for (i = 0; i < num_vnuma; i++) { +int cpu; +virBitmapPtr bitmap = NULL; +libxl_bitmap vcpu_bitmap; +libxl_vnode_info *p = &vnuma_nodes[i]; + +libxl_vnode_info_init(p); + +/* pnode */ +p->pnode = i; + +/* memory size */ +p->memkb = virDomainNumaGetNodeMemorySize(numa, i); + +/* vcpus */ +vcpus = virBitmapFormat(virDomainNumaGetNodeCpumask(numa, i)); +if (vcpus == NULL) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma sibling %zu missing vcpus set"), i); +goto cleanup; +} + +if (virBitmapParse(vcpus, &bitmap, VIR_DOMAIN_CPUMASK_LEN) < 0) +goto cleanup; + +if ((cpu = virBitmapNextSetBit(bitmap, -1)) < 0) { +VIR_FREE(bitmap); +goto cleanup; +} + +libxl_bitmap_init(&vcpu_bitmap); +if (libxl_cpu_bitmap_alloc(ctx, &vcpu_bitmap, b_info->max_vcpus)) { +virReportOOMError(); +goto cleanup; +} + +do { +libxl_bitmap_set(&vcpu_bitmap, cpu); +} while ((cpu = virBitmapNextSetBit(bitmap, cpu)) >= 0); + +libxl_bitmap_copy_alloc(ctx, &p->vcpus, &vcpu_bitmap); +libxl_bitmap_dispose(&vcpu_bitmap); + +VIR_FREE(bitmap); +VIR_FREE(vcpus); + +/* vdistances */ +if (VIR_ALLOC_N(p->distances, num_vnuma) < 0) +goto cleanup; +p->num_distances = num_vnuma; + +/* + * Apply the configured distance value. If not + * available set 10 for local or 21 for remote nodes. + */ +for (j = 0; j < num_vnuma; j++) +p->distances[j] = virDomainNumaGetNodeDistance(numa, i, j) ?: +(i == j ? 10 : 21); +} + +b_info->vnuma_nodes = vnuma_nodes; +b_info->num_vnuma_nodes = num_vnuma; + +ret = 0; + + cleanup: +if (ret) +VIR_FREE(vnuma_nodes); +VIR_FREE(vcpus); + +return ret; +} +#endif + static int libxlDiskSetDiscard(libxl_device_disk *x_disk, int discard) { @@ -2207,6 +2330,11 @@ libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, if (libxlMakeDomBuildInfo(def, ctx, caps, d_config) < 0) return -1; +#ifdef LIBXL_HAVE_VNUMA +if (libxlMakeVnumaList(def, ctx, d_config) < 0) +return -1; +#endif + if (libxlMakeDiskList(def, d_config) < 0) return -1; -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [RFC PATCH v1 0/4] numa: describe sibling nodes distances
From: Wim ten Have This patch extents guest domain administration adding support to advertise node sibling distances when configuring HVM numa guests. NUMA (non-uniform memory access), a method of configuring a cluster of nodes within a single multiprocessing system such that it shares processor local memory amongst others improving performance and the ability of the system to be expanded. A NUMA system could be illustrated as shown below. Within this 4-node system, every socket is equipped with its own distinct memory. The whole typically resembles a SMP (symmetric multiprocessing) system being a "tightly-coupled," "share everything" system in which multiple processors are working under a single operating system and can access each others' memory over multiple "Bus Interconnect" paths. +-+-+-+ +-+-+-+ | M | CPU | CPU | | CPU | CPU | M | | E | | | | | | E | | M +- Socket0 -+ +- Socket3 -+ M | | O | | | | | | O | | R | CPU | CPU <-> CPU | CPU | R | | Y | | | | | | Y | +-+--^--+-+ +-+--^--+-+ | | | Bus Interconnect | | | +-+--v--+-+ +-+--v--+-+ | M | | | | | | M | | E | CPU | CPU <-> CPU | CPU | E | | M | | | | | | M | | O +- Socket1 -+ +- Socket2 -+ O | | R | | | | | | R | | Y | CPU | CPU | | CPU | CPU | Y | +-+-+-+ +-+-+-+ In contrast there is the limitation of a flat SMP system, not illustrated. Here, as sockets are added, the bus (data and address path), under high activity, gets overloaded and easily becomes a performance bottleneck. NUMA adds an intermediate level of memory shared amongst a few cores per socket as illustrated above, so that data accesses do not have to travel over a single bus. Unfortunately the way NUMA does this adds its own limitations. This, as visualized in the illustration above, happens when data is stored in memory associated with Socket2 and is accessed by a CPU (core) in Socket0. The processors use the "Bus Interconnect" to create gateways between the sockets (nodes) enabling inter-socket access to memory. These "Bus Interconnect" hops add data access delays when a CPU (core) accesses memory associated with a remote socket (node). For terminology we refer to sockets as "nodes" where access to each others' distinct resources such as memory make them "siblings" with a designated "distance" between them. A specific design is described under the ACPI (Advanced Configuration and Power Interface Specification) within the chapter explaining the system's SLIT (System Locality Distance Information Table). These patches extend core libvirt's XML description of a virtual machine's hardware to include NUMA distance information for sibling nodes, which is then passed to Xen guests via libxl. Recently qemu landed support for constructing the SLIT since commit 0f203430dd ("numa: Allow setting NUMA distance for different NUMA nodes"), hence these core libvirt extensions can also help other drivers in supporting this feature. The XML changes made allow to describe the (or node/sockets) amongst node identifiers and propagate these towards the numa domain functionality finally adding support to libxl. [below is an example illustrating a 4 node/socket setup] By default on libxl, if no are given to describe the SLIT data between different s, this patch will default to a scheme using 10 for local and 21 for any remote node/socket, which is the assumption of guest OS when no SLIT is specified. While SLIT is optional, libxl requires that distances are set nonetheless. On Linux systems the SLIT detail can be listed with help of the 'numactl -H' command. An above HVM guest as described would on such prompt with below output. [root@f25 ~]# numactl -H available: 4 nodes (0-3) node 0 cpus: 0 4 5 6 7 node 0 size: 1988 MB node 0 free: 1743 MB node 1 cpus: 1 8 9 10 12 13 14 15 node 1 size: 1946 MB node 1 free: 1885 MB node 2 cpus: 2 11 node 2 siz
[libvirt] [RFC PATCH v1 3/4] xenconfig: add domxml conversions for xen-xl
From: Wim ten Have This patch converts NUMA configurations between the Xen libxl configuration file format and libvirt's XML format. XML HVM domain configuration: Xen xl.cfg domain configuration: vnuma = [["pnode=0","size=2048","vcpus=0-1","vdistances=10,21,31,41"], ["pnode=1","size=2048","vcpus=2-3","vdistances=21,10,21,31"], ["pnode=2","size=2048","vcpus=4-5","vdistances=31,21,10,21"], ["pnode=3","size=2048","vcpus=6-7","vdistances=41,31,21,10"]] If there is no XML description amongst the data the conversion schema from xml to native will generate 10 for local and 21 for all remote instances. Signed-off-by: Wim ten Have --- src/xenconfig/xen_xl.c | 300 + 1 file changed, 300 insertions(+) diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index cac440c..0c7dd23 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -309,6 +309,168 @@ xenParseXLSpice(virConfPtr conf, virDomainDefPtr def) return -1; } +#ifdef LIBXL_HAVE_VNUMA +static int +xenParseXLVnuma(virConfPtr conf, virDomainDefPtr def) +{ +int ret = -1; +char *tmp = NULL; +char **token = NULL; +size_t vcpus = 0; + +virConfValuePtr list; +virDomainNumaPtr numa; + +numa = def->numa; +if (numa == NULL) +return -1; + +list = virConfGetValue(conf, "vnuma"); +if (list && list->type == VIR_CONF_LIST) { +size_t nr_nodes = 0, vnodeCnt = 0; +virConfValuePtr vnode = list->list; +virCPUDefPtr cpu; + +vnode = list->list; +while (vnode) { +vnode = vnode->next; +nr_nodes++; +} + +if (!virDomainNumaSetNodeCount(numa, nr_nodes)) +goto cleanup; + +list = list->list; + +if (VIR_ALLOC(cpu) < 0) +goto cleanup; + +while (list) { + +/* Is there a sublist (vnode)? */ +if (list && list->type == VIR_CONF_LIST) { +vnode = list->list; + +while (vnode && vnode->type == VIR_CONF_STRING) { +const char *data; +const char *str = vnode->str; + +if (!str || + !(data = strrchr(str, '='))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode invalid format '%s'"), + str); +goto skipvnode; +} +data++; + +if (*data) { +size_t len; +char vtoken[64]; + +if (STRPREFIX(str, "pnode")) { +unsigned int cellid; + +len = strlen(data); +if (!virStrncpy(vtoken, data, +len, sizeof(vtoken))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu pnode '%s' too long for destination"), + vnodeCnt, data); +goto cleanup; +} + +if ((virStrToLong_ui(vtoken, NULL, 10, &cellid) < 0) || +(cellid >= nr_nodes)) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu invalid value pnode '%s'"), + vnodeCnt, data); +goto cleanup; +} +} else if (STRPREFIX(str, "size")) { +unsigned long long kbsize; + +len = strlen(data); +if (!virStrncpy(vtoken, data, +len, sizeof(vtoken))) { +virReportError(VIR_ERR_INTERNAL_ERROR, + _("vnuma vnode %zu size '%s' too long for destination"), + vnodeCnt, data); +
[libvirt] [RFC PATCH v1 1/4] numa: describe siblings distances within cells
From: Wim ten Have Add libvirtd NUMA cell domain administration functionality to describe underlying cell id sibling distances in full fashion when configuring HVM guests. [below is an example of a 4 node setup] Changes under this commit concern all those that require adding the valid data structures, virDomainNuma* functional routines and the XML doc schema enhancements to enforce appropriate administration. These changes alter the docs/schemas/cputypes.rng enforcing domain administration to follow the syntax below per numa cell id. These changes also alter docs/schemas/basictypes.rng to add "numaDistanceValue" which is an "unsignedInt" with a minimum value of 10 as 0-9 are reserved values and can not be used as System Locality Distance Information Table data. Signed-off-by: Wim ten Have --- docs/schemas/basictypes.rng | 8 ++ docs/schemas/cputypes.rng | 18 +++ src/conf/cpu_conf.c | 2 +- src/conf/numa_conf.c| 260 +++- src/conf/numa_conf.h| 25 - src/libvirt_private.syms| 6 + 6 files changed, 313 insertions(+), 6 deletions(-) diff --git a/docs/schemas/basictypes.rng b/docs/schemas/basictypes.rng index 1ea667c..a335b5d 100644 --- a/docs/schemas/basictypes.rng +++ b/docs/schemas/basictypes.rng @@ -77,6 +77,14 @@ + + + +10 + + + + diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng index 3eef16a..c45b6df 100644 --- a/docs/schemas/cputypes.rng +++ b/docs/schemas/cputypes.rng @@ -129,6 +129,24 @@ + + + + + + + + + + + + + + + + + + diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c index da40e9b..5d8f7be3 100644 --- a/src/conf/cpu_conf.c +++ b/src/conf/cpu_conf.c @@ -643,7 +643,7 @@ virCPUDefFormatBufFull(virBufferPtr buf, if (virCPUDefFormatBuf(&childrenBuf, def, updateCPU) < 0) goto cleanup; -if (virDomainNumaDefCPUFormat(&childrenBuf, numa) < 0) +if (virDomainNumaDefCPUFormatXML(&childrenBuf, numa) < 0) goto cleanup; /* Put it all together */ diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index bfd3703..1914810 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -48,6 +48,8 @@ VIR_ENUM_IMPL(virDomainMemoryAccess, VIR_DOMAIN_MEMORY_ACCESS_LAST, "shared", "private") +typedef struct _virDomainNumaDistance virDomainNumaDistance; +typedef virDomainNumaDistance *virDomainNumaDistancePtr; typedef struct _virDomainNumaNode virDomainNumaNode; typedef virDomainNumaNode *virDomainNumaNodePtr; @@ -66,6 +68,12 @@ struct _virDomainNuma { virBitmapPtr nodeset; /* host memory nodes where this guest node resides */ virDomainNumatuneMemMode mode; /* memory mode selection */ virDomainMemoryAccess memAccess; /* shared memory access configuration */ + +struct _virDomainNumaDistance { + unsigned int value;/* locality value for node i*j */ + unsigned int cellid; +} *distances; /* remote node distances */ +size_t ndistances; } *mem_nodes; /* guest node configuration */ size_t nmem_nodes; @@ -686,6 +694,95 @@ virDomainNumatuneNodesetIsAvailable(virDomainNumaPtr numatune, } +static int +virDomainNumaDefNodeDistanceParseXML(virDomainNumaPtr def, + xmlXPathContextPtr ctxt, + unsigned int cur_cell) +{ +int ret = -1; +char *tmp = NULL; +size_t i; +xmlNodePtr *nodes = NULL; +int ndistances; +virDomainNumaDistancePtr distances = NULL; + + +if (!virXPathNode("./distances[1]/sibling", ctxt)) +return 0; + +if ((ndistances = virXPathNodeSet("./distances[1]/sibling", ctxt, &nodes)) <= 0) { +virReportError(VIR_ERR_XML_ERROR, "%s", + _("NUMA distances defined without siblings")); +goto cleanup; +} + +if (ndistances < def->nmem_nodes) { +virReportError(VIR_ERR_XML_ERROR, + _("NUMA distances defined with fewer siblings than nodes for cell id: '%d'"), + cur_cell); +goto cleanup; +} + +if (VIR_ALLOC_N(distances, ndistances) < 0) +goto cleanup; + +for (i = 0; i < ndistances; i++) { +unsigned int sibling_id = i
[libvirt] [RFC PATCH v1 4/4] xlconfigtest: add tests for numa cell sibling distances
From: Wim ten Have Test a bidirectional xen-xl domxml to and from native for numa support administration as brought under this patch series. Signed-off-by: Wim ten Have --- .../test-fullvirt-vnuma-nodistances.cfg| 26 +++ .../test-fullvirt-vnuma-nodistances.xml| 54 +++ tests/xlconfigdata/test-fullvirt-vnuma.cfg | 26 +++ tests/xlconfigdata/test-fullvirt-vnuma.xml | 81 ++ tests/xlconfigtest.c | 4 ++ 5 files changed, 191 insertions(+) create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-vnuma.xml diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg new file mode 100644 index 000..9871f21 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0-1", "vdistances=10,21,21,21" ], [ "pnode=1", "size=2048", "vcpus=2-3", "vdistances=21,10,21,21" ], [ "pnode=2", "size=2048", "vcpus=4-5", "vdistances=21,21,10,21" ], [ "pnode=3", "size=2048", "vcpus=6-7", "vdistances=21,21,21,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml new file mode 100644 index 000..a576881 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma-nodistances.xml @@ -0,0 +1,54 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 8 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigdata/test-fullvirt-vnuma.cfg b/tests/xlconfigdata/test-fullvirt-vnuma.cfg new file mode 100644 index 000..91e233a --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 8192 +memory = 8192 +vcpus = 8 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +vnuma = [ [ "pnode=0", "size=2048", "vcpus=0-1", "vdistances=10,21,31,41" ], [ "pnode=1", "size=2048", "vcpus=2-3", "vdistances=21,10,21,31" ], [ "pnode=2", "size=2048", "vcpus=4-5", "vdistances=31,21,10,21" ], [ "pnode=3", "size=2048", "vcpus=6-7", "vdistances=41,31,21,10" ] ] +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2" ] diff --git a/tests/xlconfigdata/test-fullvirt-vnuma.xml b/tests/xlconfigdata/test-fullvirt-vnuma.xml new file mode 100644 index 000..5368b0d --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-vnuma.xml @@ -0,0 +1,81 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 8388608 + 8388608 + 8 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
[libvirt] [PATCH v2] xenconfig: fix handling of NULL disk source
From: Wim ten Have It is possible to crash libvirtd when converting xl native config to domXML when the xl config contains an empty disk source, e.g. an empty CDROM. Fix by checking that the disk source is non-NULL before parsing it. Wim ten Have (1): xenconfig: fix handling of NULL disk source src/xenconfig/xen_xl.c| 4 tests/xlconfigdata/test-disk-positional-parms-partial.cfg | 2 +- tests/xlconfigdata/test-disk-positional-parms-partial.xml | 6 ++ 3 files changed, 11 insertions(+), 1 deletion(-) -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v2] xenconfig: fix handling of NULL disk source
From: Wim ten Have It is possible to crash libvirtd when converting xl native config to domXML when the xl config contains an empty disk source, e.g. an empty CDROM. Fix by checking that the disk source is non-NULL before parsing it. Signed-off-by: Wim ten Have --- src/xenconfig/xen_xl.c| 4 tests/xlconfigdata/test-disk-positional-parms-partial.cfg | 2 +- tests/xlconfigdata/test-disk-positional-parms-partial.xml | 6 ++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index 4f24d45..cac440c 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -316,6 +316,10 @@ xenParseXLDiskSrc(virDomainDiskDefPtr disk, char *srcstr) char *tmpstr = NULL; int ret = -1; +/* A NULL source is valid, e.g. an empty CDROM */ +if (srcstr == NULL) +return 0; + if (STRPREFIX(srcstr, "rbd:")) { if (!(tmpstr = virStringReplace(srcstr, "", "\\"))) goto cleanup; diff --git a/tests/xlconfigdata/test-disk-positional-parms-partial.cfg b/tests/xlconfigdata/test-disk-positional-parms-partial.cfg index fd16db0..940304e 100644 --- a/tests/xlconfigdata/test-disk-positional-parms-partial.cfg +++ b/tests/xlconfigdata/test-disk-positional-parms-partial.cfg @@ -22,4 +22,4 @@ parallel = "none" serial = "none" builder = "hvm" boot = "d" -disk = [ "/dev/HostVG/XenGuest2,,hda,,backendtype=phy", "/var/lib/libvirt/images/XenGuest2-home,,hdb,,", "/root/boot.iso,,hdc,,devtype=cdrom" ] +disk = [ "/dev/HostVG/XenGuest2,,hda,,backendtype=phy", "/var/lib/libvirt/images/XenGuest2-home,,hdb,,", "/root/boot.iso,,hdc,,devtype=cdrom" , "format=raw,vdev=hdd,access=ro,devtype=cdrom" ] diff --git a/tests/xlconfigdata/test-disk-positional-parms-partial.xml b/tests/xlconfigdata/test-disk-positional-parms-partial.xml index e86a5be..52b21dc 100644 --- a/tests/xlconfigdata/test-disk-positional-parms-partial.xml +++ b/tests/xlconfigdata/test-disk-positional-parms-partial.xml @@ -39,6 +39,12 @@ + + + + + + -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH v1] xenParseXLDiskSrc: protect against a NULL pointer reference
On Fri, 19 May 2017 08:12:08 -0600 Jim Fehlig wrote: > On 05/19/2017 06:38 AM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Working larger code changes whilst testing functionality and domxml > > conversion methodology for xen-xl (xenconfig) a cumbersome caveat surfaced > > that potentially can take libvirtd out with a SEGV when parsing complex > > disk xl.cfg directives. > > > > This patch also includes tests/xlconfigdata adjustments to illustrate > > specific disk xl.cfg directive and that way updating test 2-ways. > > > > Running the tests with defensive code fix all will run without trouble, > > running it without the code fix testing will trap with below listed > > debug transcript. > > > > VIR_TEST_DEBUG=1 ./run gdb ./tests/xlconfigtest > > TEST: xlconfigtest > > 1) Xen XL-2-XML Parse fullvirt-ovmf ... OK > > 2) Xen XL-2-XML Format fullvirt-ovmf ... OK > > 3) Xen XL-2-XML Parse paravirt-maxvcpus ... OK > > 4) Xen XL-2-XML Format paravirt-maxvcpus ... OK > > 5) Xen XL-2-XML Parse new-disk... OK > > 6) Xen XL-2-XML Format new-disk... OK > > 7) Xen XL-2-XML Format disk-positional-parms-full ... OK > > 8) Xen XL-2-XML Format disk-positional-parms-partial ... Program > > received signal SIGSEGV, Segmentation fault. > > > > (gdb) where > > xlcfg=0x66d2b0 > > "/home/wtenhave/WORK/libvirt/XOSS/BUGS/libvirt/tests/xlconfigdata/test-disk-positional-parms-partial.cfg", > > xml=0x66d320 > > "/home/wtenhave/WORK/libvirt/XOSS/BUGS/libvirt/tests/xlconfigdata/test-disk-positional-parms-partial.xml", > > replaceVars=false) at xlconfigtest.c:152 > > body=0x40f32d , data=0x7fffd990) at > > testutils.c:180 > > > > (gdb) frame 1 > > 319 if (STRPREFIX(srcstr, "rbd:")) { > > > > (gdb) print srcstr > > $1 = 0x0 > > I'm always hesitant to critique verbose commit messages, but in this case I > think the real fix gets lost in the verbosity :-). critique taken O:-) > IMO something along the lines of the following would suffice: Exactly what is coming up. > xenconfig: fix handling of NULL disk source > > It is possible to crash libvirtd when converting xl native config to domXML > when > the xl config contains an empty disk source, e.g. an empty CDROM. Fix by > checking that the disk source is non-NULL before parsing it. > > > > > Signed-off-by: Wim ten Have > > --- > > src/xenconfig/xen_xl.c| 3 ++- > > tests/xlconfigdata/test-disk-positional-parms-partial.cfg | 2 +- > > tests/xlconfigdata/test-disk-positional-parms-partial.xml | 6 ++ > > 3 files changed, 9 insertions(+), 2 deletions(-) > > > > diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c > > index 4f24d45..958956a 100644 > > --- a/src/xenconfig/xen_xl.c > > +++ b/src/xenconfig/xen_xl.c > > @@ -316,7 +316,8 @@ xenParseXLDiskSrc(virDomainDiskDefPtr disk, char > > *srcstr) > > char *tmpstr = NULL; > > int ret = -1; > > > > -if (STRPREFIX(srcstr, "rbd:")) { > > +if (srcstr && > > +STRPREFIX(srcstr, "rbd:")) { > > How about checking for a NULL source on entry to this function? Something > like > the below diff? > > Looks good otherwise! > > Regards, > Jim > > diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c > index 4f24d457c..cac440cd4 100644 > --- a/src/xenconfig/xen_xl.c > +++ b/src/xenconfig/xen_xl.c > @@ -316,6 +316,10 @@ xenParseXLDiskSrc(virDomainDiskDefPtr disk, char *srcstr) > char *tmpstr = NULL; > int ret = -1; > > +/* A NULL source is valid, e.g. an empty CDROM */ > +if (srcstr == NULL) > +return 0; > + > if (STRPREFIX(srcstr, "rbd:")) { > if (!(tmpstr = virStringReplace(srcstr, "", "\\"))) > goto cleanup; > -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1] xenParseXLDiskSrc: protect against a NULL pointer reference
From: Wim ten Have This patch protects against a NULL pointer dereference leading to a SEGV and that way taking out libvirtd while performing domain conversion requests per domxml-from-native xen-xl xl.cfg within xenParseXLDiskSrc() when partial disk parameters are provided. Wim ten Have (1): xenParseXLDiskSrc: protect against a NULL pointer reference src/xenconfig/xen_xl.c| 3 ++- tests/xlconfigdata/test-disk-positional-parms-partial.cfg | 2 +- tests/xlconfigdata/test-disk-positional-parms-partial.xml | 6 ++ 3 files changed, 9 insertions(+), 2 deletions(-) -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1] xenParseXLDiskSrc: protect against a NULL pointer reference
From: Wim ten Have Working larger code changes whilst testing functionality and domxml conversion methodology for xen-xl (xenconfig) a cumbersome caveat surfaced that potentially can take libvirtd out with a SEGV when parsing complex disk xl.cfg directives. This patch also includes tests/xlconfigdata adjustments to illustrate specific disk xl.cfg directive and that way updating test 2-ways. Running the tests with defensive code fix all will run without trouble, running it without the code fix testing will trap with below listed debug transcript. VIR_TEST_DEBUG=1 ./run gdb ./tests/xlconfigtest TEST: xlconfigtest 1) Xen XL-2-XML Parse fullvirt-ovmf ... OK 2) Xen XL-2-XML Format fullvirt-ovmf ... OK 3) Xen XL-2-XML Parse paravirt-maxvcpus ... OK 4) Xen XL-2-XML Format paravirt-maxvcpus ... OK 5) Xen XL-2-XML Parse new-disk... OK 6) Xen XL-2-XML Format new-disk... OK 7) Xen XL-2-XML Format disk-positional-parms-full ... OK 8) Xen XL-2-XML Format disk-positional-parms-partial ... Program received signal SIGSEGV, Segmentation fault. (gdb) where xlcfg=0x66d2b0 "/home/wtenhave/WORK/libvirt/XOSS/BUGS/libvirt/tests/xlconfigdata/test-disk-positional-parms-partial.cfg", xml=0x66d320 "/home/wtenhave/WORK/libvirt/XOSS/BUGS/libvirt/tests/xlconfigdata/test-disk-positional-parms-partial.xml", replaceVars=false) at xlconfigtest.c:152 body=0x40f32d , data=0x7fffd990) at testutils.c:180 (gdb) frame 1 319 if (STRPREFIX(srcstr, "rbd:")) { (gdb) print srcstr $1 = 0x0 Signed-off-by: Wim ten Have --- src/xenconfig/xen_xl.c| 3 ++- tests/xlconfigdata/test-disk-positional-parms-partial.cfg | 2 +- tests/xlconfigdata/test-disk-positional-parms-partial.xml | 6 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index 4f24d45..958956a 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -316,7 +316,8 @@ xenParseXLDiskSrc(virDomainDiskDefPtr disk, char *srcstr) char *tmpstr = NULL; int ret = -1; -if (STRPREFIX(srcstr, "rbd:")) { +if (srcstr && +STRPREFIX(srcstr, "rbd:")) { if (!(tmpstr = virStringReplace(srcstr, "", "\\"))) goto cleanup; diff --git a/tests/xlconfigdata/test-disk-positional-parms-partial.cfg b/tests/xlconfigdata/test-disk-positional-parms-partial.cfg index fd16db0..940304e 100644 --- a/tests/xlconfigdata/test-disk-positional-parms-partial.cfg +++ b/tests/xlconfigdata/test-disk-positional-parms-partial.cfg @@ -22,4 +22,4 @@ parallel = "none" serial = "none" builder = "hvm" boot = "d" -disk = [ "/dev/HostVG/XenGuest2,,hda,,backendtype=phy", "/var/lib/libvirt/images/XenGuest2-home,,hdb,,", "/root/boot.iso,,hdc,,devtype=cdrom" ] +disk = [ "/dev/HostVG/XenGuest2,,hda,,backendtype=phy", "/var/lib/libvirt/images/XenGuest2-home,,hdb,,", "/root/boot.iso,,hdc,,devtype=cdrom" , "format=raw,vdev=hdd,access=ro,devtype=cdrom" ] diff --git a/tests/xlconfigdata/test-disk-positional-parms-partial.xml b/tests/xlconfigdata/test-disk-positional-parms-partial.xml index e86a5be..52b21dc 100644 --- a/tests/xlconfigdata/test-disk-positional-parms-partial.xml +++ b/tests/xlconfigdata/test-disk-positional-parms-partial.xml @@ -39,6 +39,12 @@ + + + + + + -- 2.9.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v2] libxl: report numa sibling distances on host capabilities
From: Wim ten Have When running on a NUMA machine, populate the sibling node and distance information using data supplied by Xen. With locality distances information, under Xen, new host capabilities would like: 263902380 ... ... ... Signed-off-by: Wim ten Have --- src/libxl/libxl_capabilities.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/libxl/libxl_capabilities.c b/src/libxl/libxl_capabilities.c index 839a2ee..e095920 100644 --- a/src/libxl/libxl_capabilities.c +++ b/src/libxl/libxl_capabilities.c @@ -247,8 +247,9 @@ libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) { libxl_numainfo *numa_info = NULL; libxl_cputopology *cpu_topo = NULL; -int nr_nodes = 0, nr_cpus = 0; +int nr_nodes = 0, nr_cpus = 0, nr_siblings = 0; virCapsHostNUMACellCPUPtr *cpus = NULL; +virCapsHostNUMACellSiblingInfoPtr siblings = NULL; int *nr_cpus_node = NULL; size_t i; int ret = -1; @@ -322,10 +323,23 @@ libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) if (numa_info[i].size == LIBXL_NUMAINFO_INVALID_ENTRY) continue; +nr_siblings = numa_info[i].num_dists; +if (nr_siblings) { +size_t j; + +if (VIR_ALLOC_N(siblings, nr_siblings) < 0) +goto cleanup; + +for (j = 0; j < nr_siblings; j++) { +siblings[j].node = j; +siblings[j].distance = numa_info[i].dists[j]; +} +} + if (virCapabilitiesAddHostNUMACell(caps, i, numa_info[i].size / 1024, nr_cpus_node[i], cpus[i], - 0, NULL, + nr_siblings, siblings, 0, NULL) < 0) { virCapabilitiesClearHostNUMACellCPUTopology(cpus[i], nr_cpus_node[i]); @@ -343,6 +357,7 @@ libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) for (i = 0; cpus && i < nr_nodes; i++) VIR_FREE(cpus[i]); virCapabilitiesFreeNUMAInfo(caps); +VIR_FREE(siblings); } VIR_FREE(cpus); -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v2] libxl: report numa sibling distances on host capabilities
From: Wim ten Have When running on a NUMA machine, populate the sibling node and distance information using data supplied by Xen. Wim ten Have (1): libxl: report numa sibling distances on host capabilities src/libxl/libxl_capabilities.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1] libxl: report numa sibling distances on host capabilities
From: Wim ten Have When running on a NUMA machine, populate the sibling node and distance information using data supplied by Xen. Wim ten Have (1): libxl: report numa sibling distances on host capabilities src/libxl/libxl_capabilities.c | 20 ++-- 1 file changed, 18 insertions(+), 2 deletions(-) -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v1] libxl: report numa sibling distances on host capabilities
From: Wim ten Have When running on a NUMA machine, populate the sibling node and distance information using data supplied by Xen. With locality distances information, under Xen, new host capabilities would like: 263902380 ... ... ... Signed-off-by: Wim ten Have --- src/libxl/libxl_capabilities.c | 20 ++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/libxl/libxl_capabilities.c b/src/libxl/libxl_capabilities.c index 839a2ee..1702ecb 100644 --- a/src/libxl/libxl_capabilities.c +++ b/src/libxl/libxl_capabilities.c @@ -247,8 +247,9 @@ libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) { libxl_numainfo *numa_info = NULL; libxl_cputopology *cpu_topo = NULL; -int nr_nodes = 0, nr_cpus = 0; +int nr_nodes = 0, nr_cpus = 0, nr_siblings = 0; virCapsHostNUMACellCPUPtr *cpus = NULL; +virCapsHostNUMACellSiblingInfoPtr siblings = NULL; int *nr_cpus_node = NULL; size_t i; int ret = -1; @@ -322,10 +323,24 @@ libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) if (numa_info[i].size == LIBXL_NUMAINFO_INVALID_ENTRY) continue; +#ifdef LIBXL_HAVE_VNUMA +nr_siblings = numa_info[i].num_dists; +if (nr_siblings) { +size_t j; + +if (VIR_ALLOC_N(siblings, nr_siblings) < 0) +goto cleanup; + +for (j = 0; j < nr_siblings; j++) { +siblings[j].node = j; +siblings[j].distance = numa_info[i].dists[j]; +} +} +#endif if (virCapabilitiesAddHostNUMACell(caps, i, numa_info[i].size / 1024, nr_cpus_node[i], cpus[i], - 0, NULL, + nr_siblings, siblings, 0, NULL) < 0) { virCapabilitiesClearHostNUMACellCPUTopology(cpus[i], nr_cpus_node[i]); @@ -343,6 +358,7 @@ libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) for (i = 0; cpus && i < nr_nodes; i++) VIR_FREE(cpus[i]); virCapabilitiesFreeNUMAInfo(caps); +VIR_FREE(siblings); } VIR_FREE(cpus); -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v4 3/3] xlconfigtest: add tests for 'nestedhvm' support
From: Wim ten Have Testing various configuration schemas targeting postive and negative nestedhvm under libvirt configuration. Mode "host-passthrough" generates nestedhvm=1 in/from xl format where Intel virtualization (VT-x): or AMD virtualization (AMD-V): disables virtualization mode under guest domains. Signed-off-by: Wim ten Have Signed-off-by: Jim Fehlig --- tests/testutilsxen.c | 30 +++ .../test-fullvirt-nestedhvm-disabled.cfg | 26 + .../test-fullvirt-nestedhvm-disabled.xml | 61 ++ tests/xlconfigdata/test-fullvirt-nestedhvm.cfg | 26 + tests/xlconfigdata/test-fullvirt-nestedhvm.xml | 59 + tests/xlconfigtest.c | 2 + 6 files changed, 204 insertions(+) create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.xml diff --git a/tests/testutilsxen.c b/tests/testutilsxen.c index 122789c..c4fc146 100644 --- a/tests/testutilsxen.c +++ b/tests/testutilsxen.c @@ -6,6 +6,33 @@ #include "testutilsxen.h" #include "domain_conf.h" +static virCPUFeatureDef cpuDefaultFeatures[] = { +{ (char *) "ds",-1 }, +{ (char *) "acpi", -1 }, +{ (char *) "ss",-1 }, +{ (char *) "ht",-1 }, +{ (char *) "tm",-1 }, +{ (char *) "pbe", -1 }, +{ (char *) "ds_cpl",-1 }, +{ (char *) "vmx", -1 }, +{ (char *) "est", -1 }, +{ (char *) "tm2", -1 }, +{ (char *) "cx16", -1 }, +{ (char *) "xtpr", -1 }, +{ (char *) "lahf_lm", -1 }, +}; +static virCPUDef cpuDefaultData = { +.type = VIR_CPU_TYPE_HOST, +.arch = VIR_ARCH_X86_64, +.model = (char *) "core2duo", +.vendor = (char *) "Intel", +.sockets = 1, +.cores = 2, +.threads = 1, +.nfeatures = ARRAY_CARDINALITY(cpuDefaultFeatures), +.nfeatures_max = ARRAY_CARDINALITY(cpuDefaultFeatures), +.features = cpuDefaultFeatures, +}; virCapsPtr testXenCapsInit(void) { @@ -88,6 +115,9 @@ testXLInitCaps(void) if ((caps = virCapabilitiesNew(virArchFromHost(), false, false)) == NULL) return NULL; + +caps->host.cpu = virCPUDefCopy(&cpuDefaultData); + nmachines = ARRAY_CARDINALITY(x86_machines); if ((machines = virCapabilitiesAllocMachines(x86_machines, nmachines)) == NULL) goto cleanup; diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg new file mode 100644 index 000..d4b9f45 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 579 +memory = 394 +vcpus = 1 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +nestedhvm = 0 +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2", "format=qcow2,vdev=hdb,access=rw,backendtype=qdisk,target=/var/lib/libvirt/images/XenGuest2-home", "format=raw,vdev=hdc,access=ro,backendtype=qdisk,devtype=cdrom,target=/root/boot.iso" ] diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml new file mode 100644 index 000..58b6338 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml @@ -0,0 +1,61 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 592896 + 403456 + 1 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm.cfg b/tests/xlconfigdata/test-fullvirt-nestedhvm.cfg new file mode 100644 index 000..281f126
[libvirt] [PATCH v4 0/3] libxl: nestedhvm support
From: Wim ten Have This patch enhances host-passthrough capability to advertise the required vendor CPU virtualization feature which will be used to enable 'nestedhvm' in the libxl driver. Wim ten Have (3): libxl: set nestedhvm for mode host-passthrough xenconfig: add conversions for xen-xl xlconfigtest: add tests for 'nestedhvm' support cfg.mk | 4 +- src/libxl/libxl_conf.c | 39 ++- src/libxl/libxl_conf.h | 1 + src/libxl/libxl_domain.c | 2 +- src/xenconfig/xen_xl.c | 76 ++ tests/testutilsxen.c | 30 + .../test-fullvirt-nestedhvm-disabled.cfg | 26 .../test-fullvirt-nestedhvm-disabled.xml | 61 + tests/xlconfigdata/test-fullvirt-nestedhvm.cfg | 26 tests/xlconfigdata/test-fullvirt-nestedhvm.xml | 59 + tests/xlconfigtest.c | 2 + 11 files changed, 322 insertions(+), 4 deletions(-) create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.xml -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v4 1/3] libxl: set nestedhvm for mode host-passthrough
From: Wim ten Have Xen feature nestedhvm is the option on Xen 4.4+ which enables nested virtualization when mode host-passthrough is applied. nested HVM is enabled by adding below on the target domain; Virtualization on target domain can be disabled by specifying such under feature policy rule on target name; [On Intel (VT-x) architecture] or: [On AMD (AMD-V) architecture] Signed-off-by: Joao Martins Signed-off-by: Wim ten Have --- src/libxl/libxl_conf.c | 39 ++- src/libxl/libxl_conf.h | 1 + src/libxl/libxl_domain.c | 2 +- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index 4bab651..56bc097 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -47,6 +47,7 @@ #include "libxl_utils.h" #include "virstoragefile.h" #include "secret_util.h" +#include "cpu/cpu.h" #define VIR_FROM_THIS VIR_FROM_LIBXL @@ -293,6 +294,7 @@ libxlMakeChrdevStr(virDomainChrDefPtr def, char **buf) static int libxlMakeDomBuildInfo(virDomainDefPtr def, libxl_ctx *ctx, + virCapsPtr caps, libxl_domain_config *d_config) { libxl_domain_build_info *b_info = &d_config->b_info; @@ -374,6 +376,40 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, def->features[VIR_DOMAIN_FEATURE_ACPI] == VIR_TRISTATE_SWITCH_ON); +if (caps && +def->cpu && def->cpu->mode == (VIR_CPU_MODE_HOST_PASSTHROUGH)) { +bool hasHwVirt = false; +bool svm = false, vmx = false; + +if (ARCH_IS_X86(def->os.arch)) { +vmx = virCPUCheckFeature(caps->host.arch, caps->host.cpu, "vmx"); +svm = virCPUCheckFeature(caps->host.arch, caps->host.cpu, "svm"); +hasHwVirt = vmx | svm; +} + +if (def->cpu->nfeatures) { +for (i = 0; i < def->cpu->nfeatures; i++) { + +switch (def->cpu->features[i].policy) { + +case VIR_CPU_FEATURE_DISABLE: +case VIR_CPU_FEATURE_FORBID: +if ((vmx && STREQ(def->cpu->features[i].name, "vmx")) || +(svm && STREQ(def->cpu->features[i].name, "svm"))) +hasHwVirt = false; +break; + +case VIR_CPU_FEATURE_FORCE: +case VIR_CPU_FEATURE_REQUIRE: +case VIR_CPU_FEATURE_OPTIONAL: +case VIR_CPU_FEATURE_LAST: +break; +} +} +} +libxl_defbool_set(&b_info->u.hvm.nested_hvm, hasHwVirt); +} + if (def->nsounds > 0) { /* * Use first sound device. man xl.cfg(5) describes soundhw as @@ -2089,6 +2125,7 @@ libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, virDomainDefPtr def, const char *channelDir LIBXL_ATTR_UNUSED, libxl_ctx *ctx, + virCapsPtr caps, libxl_domain_config *d_config) { libxl_domain_config_init(d_config); @@ -2096,7 +2133,7 @@ libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, if (libxlMakeDomCreateInfo(ctx, def, &d_config->c_info) < 0) return -1; -if (libxlMakeDomBuildInfo(def, ctx, d_config) < 0) +if (libxlMakeDomBuildInfo(def, ctx, caps, d_config) < 0) return -1; if (libxlMakeDiskList(def, d_config) < 0) diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h index c653c9f..264df11 100644 --- a/src/libxl/libxl_conf.h +++ b/src/libxl/libxl_conf.h @@ -217,6 +217,7 @@ libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, virDomainDefPtr def, const char *channelDir LIBXL_ATTR_UNUSED, libxl_ctx *ctx, + virCapsPtr caps, libxl_domain_config *d_config); static inline void diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c index ea28c93..256cf1d 100644 --- a/src/libxl/libxl_domain.c +++ b/src/libxl/libxl_domain.c @@ -1256,7 +1256,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, goto cleanup_dom; if (libxlBuildDomainConfig(driver->reservedGraphicsPorts, vm->def, - cfg->channelDir, cfg->ctx, &d_config) < 0) + cfg->channelDir, cfg->ctx, cfg->caps, &d_config) < 0) goto cleanup_dom; if (cfg->autoballoon && libxlDomainFreeMem(cfg->ctx, &d_config) < 0) -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v4 2/3] xenconfig: add conversions for xen-xl
From: Wim ten Have Per xen-xl conversions from and to native under host-passthrough mode we take care for Xen (nestedhvm = mode) applied and inherited settings generating or processing correct feature policy: [On Intel (VT-x) architectures] or [On AMD (AMD-V) architectures] It will then generate (or parse) for nestedhvm=1 in/from xl format. Signed-off-by: Joao Martins Signed-off-by: Wim ten Have --- cfg.mk | 4 +-- src/xenconfig/xen_xl.c | 76 ++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/cfg.mk b/cfg.mk index 09548fe..3da8332 100644 --- a/cfg.mk +++ b/cfg.mk @@ -768,7 +768,7 @@ sc_prohibit_gettext_markup: # lower-level code must not include higher-level headers. cross_dirs=$(patsubst $(srcdir)/src/%.,%,$(wildcard $(srcdir)/src/*/.)) cross_dirs_re=($(subst / ,/|,$(cross_dirs))) -mid_dirs=access|conf|cpu|locking|logging|network|node_device|rpc|security|storage +mid_dirs=access|conf|cpu|locking|logging|network|node_device|rpc|security|storage|xen sc_prohibit_cross_inclusion: @for dir in $(cross_dirs); do \ case $$dir in \ @@ -777,7 +777,7 @@ sc_prohibit_cross_inclusion: locking/) safe="($$dir|util|conf|rpc)";;\ cpu/| network/| node_device/| rpc/| security/| storage/)\ safe="($$dir|util|conf|storage)";;\ - xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen)";; \ + xenapi/) safe="($$dir|util|conf|xen)";; \ *) safe="($$dir|$(mid_dirs)|util)";;\ esac; \ in_vc_files="^src/$$dir" \ diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index a5b342a..4f24d45 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -34,6 +34,7 @@ #include "virstoragefile.h" #include "xen_xl.h" #include "libxl_capabilities.h" +#include "cpu/cpu.h" #define VIR_FROM_THIS VIR_FROM_XENXL @@ -106,6 +107,7 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, virCapsPtr caps) if (def->os.type == VIR_DOMAIN_OSTYPE_HVM) { const char *bios; const char *boot; +int val = 0; if (xenConfigGetString(conf, "bios", &bios, NULL) < 0) return -1; @@ -164,6 +166,52 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, virCapsPtr caps) } def->os.nBootDevs++; } + +if (xenConfigGetBool(conf, "nestedhvm", &val, -1) < 0) +return -1; + +if (val == 1) { +virCPUDefPtr cpu; + +if (VIR_ALLOC(cpu) < 0) +return -1; + +cpu->mode = VIR_CPU_MODE_HOST_PASSTHROUGH; +cpu->type = VIR_CPU_TYPE_GUEST; +def->cpu = cpu; +} else if (val == 0) { +const char *vtfeature = NULL; + +if (caps && caps->host.cpu && ARCH_IS_X86(def->os.arch)) { +if (virCPUCheckFeature(caps->host.arch, caps->host.cpu, "vmx")) +vtfeature = "vmx"; +else if (virCPUCheckFeature(caps->host.arch, caps->host.cpu, "svm")) +vtfeature = "svm"; +} + +if (vtfeature) { +virCPUDefPtr cpu; + +if (VIR_ALLOC(cpu) < 0) +return -1; + +if (VIR_ALLOC(cpu->features) < 0) { +VIR_FREE(cpu); +return -1; +} + +if (VIR_STRDUP(cpu->features->name, vtfeature) < 0) { +VIR_FREE(cpu->features); +VIR_FREE(cpu); +return -1; +} +cpu->features->policy = VIR_CPU_FEATURE_DISABLE; +cpu->nfeatures = cpu->nfeatures_max = 1; +cpu->mode = VIR_CPU_MODE_HOST_PASSTHROUGH; +cpu->type = VIR_CPU_TYPE_GUEST; +def->cpu = cpu; +} +} } else { if (xenConfigCopyStringOpt(conf, "bootloader", &def->os.bootloader) < 0) return -1; @@ -899,6 +947,34 @@ xenFormatXLOS(virConfPtr conf, virDomainDefPtr def) if (xenConfigSetString(conf, "boot", boot) < 0) return -1; +if (def->cpu && +def->cpu->mode == VIR_CPU_MODE_HOST_PASSTHROUGH) { +bool hasHwVirt = true; + +if (def->cpu->nfeatures) { +for (i = 0; i < def->cpu->nfeat
Re: [libvirt] [PATCH v3 2/3] xenconfig: add conversions for xen-xl
On Thu, 20 Apr 2017 15:40:22 -0600 Jim Fehlig wrote: > Wim Ten Have wrote: > > From: Wim ten Have > > > > Per xen-xl conversions from and to native under host-passthrough > > mode we take care for Xen (nestedhvm = mode) applied and inherited > > settings generating or processing correct feature policy: > > > > [On Intel (VT-x) architectures] > > > > > > or > > > > [On AMD (AMD-V) architectures] > > > > > > It will then generate (or parse) for nestedhvm=1 in/from xl format. > > > > Signed-off-by: Joao Martins > > Signed-off-by: Wim ten Have > > --- > > cfg.mk | 2 +- > > src/xenconfig/xen_xl.c | 64 > > ++ > > 2 files changed, 65 insertions(+), 1 deletion(-) > > > > diff --git a/cfg.mk b/cfg.mk > > index 69e3f3a..32c3725 100644 > > --- a/cfg.mk > > +++ b/cfg.mk > > @@ -777,7 +777,7 @@ sc_prohibit_cross_inclusion: > > locking/) safe="($$dir|util|conf|rpc)";;\ > > cpu/| network/| node_device/| rpc/| security/| storage/)\ > > safe="($$dir|util|conf|storage)";;\ > > - xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen)";; \ > > + xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen|cpu)";; \ > > It would be nice to get another libvirt dev opinion on this change. As I said > in > the V2 thread, it seems we could remove xenconfig from this check. > > > *) safe="($$dir|$(mid_dirs)|util)";;\ > > E.g. let it be handled in this case. In that case we have to add 'xen' to "mid_dirs" above. --- a/cfg.mk +++ b/cfg.mk @@ -768,7 +768,7 @@ sc_prohibit_gettext_markup: # lower-level code must not include higher-level headers. cross_dirs=$(patsubst $(srcdir)/src/%.,%,$(wildcard $(srcdir)/src/*/.)) cross_dirs_re=($(subst / ,/|,$(cross_dirs))) -mid_dirs=access|conf|cpu|locking|logging|network|node_device|rpc|security|storage +mid_dirs=access|conf|cpu|locking|logging|network|node_device|rpc|security|storage|xen Otherwise there's various other complains. ... sound like this is a bit deserted area. My selection to add cpu under xenapi/|xenconfig/) was to have it at lease minimized to the world of xen arena. > > esac; \ > > in_vc_files="^src/$$dir" \ > > diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c > > index 74f68b3..62af8b8 100644 > > --- a/src/xenconfig/xen_xl.c > > +++ b/src/xenconfig/xen_xl.c > > @@ -34,6 +34,7 @@ > > #include "virstoragefile.h" > > #include "xen_xl.h" > > #include "libxl_capabilities.h" > > +#include "cpu/cpu.h" > > > > #define VIR_FROM_THIS VIR_FROM_XENXL > > > > @@ -106,6 +107,7 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, > > virCapsPtr caps) > > if (def->os.type == VIR_DOMAIN_OSTYPE_HVM) { > > const char *bios; > > const char *boot; > > +int val = 0; > > > > if (xenConfigGetString(conf, "bios", &bios, NULL) < 0) > > return -1; > > @@ -164,6 +166,40 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, > > virCapsPtr caps) > > } > > def->os.nBootDevs++; > > } > > + > > +if (xenConfigGetBool(conf, "nestedhvm", &val, -1) < 0) > > +return -1; > > + > > +if (val != -1) { > > +virCPUDefPtr cpu = NULL; > > + > > +if (VIR_ALLOC(cpu) < 0) > > +return -1; > > + > > +if (val == 0) { > > +bool isVTx = true; > > + > > +if (VIR_ALLOC(cpu->features) < 0) { > > +VIR_FREE(cpu); > > +return -1; > > +} > > + > > +if (caps && caps->host.cpu && ARCH_IS_X86(def->os.arch)) > > +isVTx = virCPUCheckFeature(caps->host.arch, > > caps->host.cpu, "vmx"); > > + > > +if (VIR_STRDUP(cpu->features->name, isVTx ? "vmx" : "svm") > > < 0) { > > +VIR_FREE(cpu->features); > > +VI
Re: [libvirt] [PATCH v3 1/3] libxl: set nestedhvm for mode host-passthrough
On Thu, 20 Apr 2017 14:28:11 -0600 Jim Fehlig wrote: > Wim Ten Have wrote: > > From: Wim ten Have > > > > Xen feature nestedhvm is the option on Xen 4.4+ which enables > > nested virtualization when mode host-passthrough is applied. > > > > nested HVM is enabled by adding below on the target domain; > > > > > > Virtualization on target domain can be disabled by specifying > > such under feature policy rule on target name; > > > > [On Intel (VT-x) architecture] > > > > > > or: > > > > [On AMD (AMD-V) architecture] > > > > > > Signed-off-by: Joao Martins > > Signed-off-by: Wim ten Have > > --- > > src/libxl/libxl_conf.c | 46 > > +- > > src/libxl/libxl_conf.h | 2 +- > > src/libxl/libxl_domain.c | 2 +- > > 3 files changed, 43 insertions(+), 7 deletions(-) > > > > diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c > > index f5b788b..31508d6 100644 > > --- a/src/libxl/libxl_conf.c > > +++ b/src/libxl/libxl_conf.c > > @@ -47,6 +47,7 @@ > > #include "libxl_utils.h" > > #include "virstoragefile.h" > > #include "secret_util.h" > > +#include "cpu/cpu.h" > > > > > > #define VIR_FROM_THIS VIR_FROM_LIBXL > > @@ -292,7 +293,7 @@ libxlMakeChrdevStr(virDomainChrDefPtr def, char **buf) > > > > static int > > libxlMakeDomBuildInfo(virDomainDefPtr def, > > - libxl_ctx *ctx, > > + libxlDriverConfigPtr cfg, > >libxl_domain_config *d_config) > > { > > libxl_domain_build_info *b_info = &d_config->b_info; > > @@ -308,7 +309,7 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, > > libxl_domain_build_info_init_type(b_info, LIBXL_DOMAIN_TYPE_PV); > > > > b_info->max_vcpus = virDomainDefGetVcpusMax(def); > > -if (libxl_cpu_bitmap_alloc(ctx, &b_info->avail_vcpus, > > b_info->max_vcpus)) > > +if (libxl_cpu_bitmap_alloc(cfg->ctx, &b_info->avail_vcpus, > > b_info->max_vcpus)) > > return -1; > > libxl_bitmap_set_none(&b_info->avail_vcpus); > > for (i = 0; i < virDomainDefGetVcpus(def); i++) > > @@ -374,6 +375,41 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, > >def->features[VIR_DOMAIN_FEATURE_ACPI] == > >VIR_TRISTATE_SWITCH_ON); > > > > +if (cfg && def->cpu && > > + def->cpu->mode == (VIR_CPU_MODE_HOST_PASSTHROUGH)) { > > +bool hasHwVirt = false; > > +bool svm = false, vmx = false; > > +virCapsPtr caps = cfg->caps; > > + > > +if (caps && ARCH_IS_X86(def->os.arch)) { > > +vmx = virCPUCheckFeature(caps->host.arch, caps->host.cpu, > > "vmx"); > > +svm = virCPUCheckFeature(caps->host.arch, caps->host.cpu, > > "svm"); > > +hasHwVirt = vmx | svm; > > +} > > + > > +if (def->cpu->nfeatures) { > > +for (i = 0; i < def->cpu->nfeatures; i++) { > > + > > +switch (def->cpu->features[i].policy) { > > + > > +case VIR_CPU_FEATURE_DISABLE: > > +case VIR_CPU_FEATURE_FORBID: > > +if ((vmx && STREQ(def->cpu->features[i].name, > > "vmx")) || > > +(svm && STREQ(def->cpu->features[i].name, > > "svm"))) > > +hasHwVirt = false; > > +break; > > + > > +case VIR_CPU_FEATURE_FORCE: > > +case VIR_CPU_FEATURE_REQUIRE: > > +case VIR_CPU_FEATURE_OPTIONAL: > > +case VIR_CPU_FEATURE_LAST: > > +break; > > +} > > +} > > +} > > +libxl_defbool_set(&b_info->u.hvm.nested_hvm, hasHwVirt); > > +} > > + > > if (def->nsounds > 0) { > > /* > > * Use first sound device. man xl.cfg(5) describes soundhw as > > @@ -2087,15 +2123,15 @@ int > > libxlBuildDomainConfig(virPortAllocato
[libvirt] [PATCH v3 1/3] libxl: set nestedhvm for mode host-passthrough
From: Wim ten Have Xen feature nestedhvm is the option on Xen 4.4+ which enables nested virtualization when mode host-passthrough is applied. nested HVM is enabled by adding below on the target domain; Virtualization on target domain can be disabled by specifying such under feature policy rule on target name; [On Intel (VT-x) architecture] or: [On AMD (AMD-V) architecture] Signed-off-by: Joao Martins Signed-off-by: Wim ten Have --- src/libxl/libxl_conf.c | 46 +- src/libxl/libxl_conf.h | 2 +- src/libxl/libxl_domain.c | 2 +- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index f5b788b..31508d6 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -47,6 +47,7 @@ #include "libxl_utils.h" #include "virstoragefile.h" #include "secret_util.h" +#include "cpu/cpu.h" #define VIR_FROM_THIS VIR_FROM_LIBXL @@ -292,7 +293,7 @@ libxlMakeChrdevStr(virDomainChrDefPtr def, char **buf) static int libxlMakeDomBuildInfo(virDomainDefPtr def, - libxl_ctx *ctx, + libxlDriverConfigPtr cfg, libxl_domain_config *d_config) { libxl_domain_build_info *b_info = &d_config->b_info; @@ -308,7 +309,7 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, libxl_domain_build_info_init_type(b_info, LIBXL_DOMAIN_TYPE_PV); b_info->max_vcpus = virDomainDefGetVcpusMax(def); -if (libxl_cpu_bitmap_alloc(ctx, &b_info->avail_vcpus, b_info->max_vcpus)) +if (libxl_cpu_bitmap_alloc(cfg->ctx, &b_info->avail_vcpus, b_info->max_vcpus)) return -1; libxl_bitmap_set_none(&b_info->avail_vcpus); for (i = 0; i < virDomainDefGetVcpus(def); i++) @@ -374,6 +375,41 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, def->features[VIR_DOMAIN_FEATURE_ACPI] == VIR_TRISTATE_SWITCH_ON); +if (cfg && def->cpu && + def->cpu->mode == (VIR_CPU_MODE_HOST_PASSTHROUGH)) { +bool hasHwVirt = false; +bool svm = false, vmx = false; +virCapsPtr caps = cfg->caps; + +if (caps && ARCH_IS_X86(def->os.arch)) { +vmx = virCPUCheckFeature(caps->host.arch, caps->host.cpu, "vmx"); +svm = virCPUCheckFeature(caps->host.arch, caps->host.cpu, "svm"); +hasHwVirt = vmx | svm; +} + +if (def->cpu->nfeatures) { +for (i = 0; i < def->cpu->nfeatures; i++) { + +switch (def->cpu->features[i].policy) { + +case VIR_CPU_FEATURE_DISABLE: +case VIR_CPU_FEATURE_FORBID: +if ((vmx && STREQ(def->cpu->features[i].name, "vmx")) || +(svm && STREQ(def->cpu->features[i].name, "svm"))) +hasHwVirt = false; +break; + +case VIR_CPU_FEATURE_FORCE: +case VIR_CPU_FEATURE_REQUIRE: +case VIR_CPU_FEATURE_OPTIONAL: +case VIR_CPU_FEATURE_LAST: +break; +} +} +} +libxl_defbool_set(&b_info->u.hvm.nested_hvm, hasHwVirt); +} + if (def->nsounds > 0) { /* * Use first sound device. man xl.cfg(5) describes soundhw as @@ -2087,15 +2123,15 @@ int libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, virDomainDefPtr def, const char *channelDir LIBXL_ATTR_UNUSED, - libxl_ctx *ctx, + libxlDriverConfigPtr cfg, libxl_domain_config *d_config) { libxl_domain_config_init(d_config); -if (libxlMakeDomCreateInfo(ctx, def, &d_config->c_info) < 0) +if (libxlMakeDomCreateInfo(cfg->ctx, def, &d_config->c_info) < 0) return -1; -if (libxlMakeDomBuildInfo(def, ctx, d_config) < 0) +if (libxlMakeDomBuildInfo(def, cfg, d_config) < 0) return -1; if (libxlMakeDiskList(def, d_config) < 0) diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h index c653c9f..7a83669 100644 --- a/src/libxl/libxl_conf.h +++ b/src/libxl/libxl_conf.h @@ -216,7 +216,7 @@ int libxlBuildDomainConfig(virPortAllocatorPtr graphicsports, virDomainDefPtr def, const char *channelDir LIBXL_ATTR_UNUSED, - libxl_ctx *ctx, + libxlDriverConfigPtr cfg, libxl_d
[libvirt] [PATCH v3 2/3] xenconfig: add conversions for xen-xl
From: Wim ten Have Per xen-xl conversions from and to native under host-passthrough mode we take care for Xen (nestedhvm = mode) applied and inherited settings generating or processing correct feature policy: [On Intel (VT-x) architectures] or [On AMD (AMD-V) architectures] It will then generate (or parse) for nestedhvm=1 in/from xl format. Signed-off-by: Joao Martins Signed-off-by: Wim ten Have --- cfg.mk | 2 +- src/xenconfig/xen_xl.c | 64 ++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/cfg.mk b/cfg.mk index 69e3f3a..32c3725 100644 --- a/cfg.mk +++ b/cfg.mk @@ -777,7 +777,7 @@ sc_prohibit_cross_inclusion: locking/) safe="($$dir|util|conf|rpc)";;\ cpu/| network/| node_device/| rpc/| security/| storage/)\ safe="($$dir|util|conf|storage)";;\ - xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen)";; \ + xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen|cpu)";; \ *) safe="($$dir|$(mid_dirs)|util)";;\ esac; \ in_vc_files="^src/$$dir" \ diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index 74f68b3..62af8b8 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -34,6 +34,7 @@ #include "virstoragefile.h" #include "xen_xl.h" #include "libxl_capabilities.h" +#include "cpu/cpu.h" #define VIR_FROM_THIS VIR_FROM_XENXL @@ -106,6 +107,7 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, virCapsPtr caps) if (def->os.type == VIR_DOMAIN_OSTYPE_HVM) { const char *bios; const char *boot; +int val = 0; if (xenConfigGetString(conf, "bios", &bios, NULL) < 0) return -1; @@ -164,6 +166,40 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, virCapsPtr caps) } def->os.nBootDevs++; } + +if (xenConfigGetBool(conf, "nestedhvm", &val, -1) < 0) +return -1; + +if (val != -1) { +virCPUDefPtr cpu = NULL; + +if (VIR_ALLOC(cpu) < 0) +return -1; + +if (val == 0) { +bool isVTx = true; + +if (VIR_ALLOC(cpu->features) < 0) { +VIR_FREE(cpu); +return -1; +} + +if (caps && caps->host.cpu && ARCH_IS_X86(def->os.arch)) +isVTx = virCPUCheckFeature(caps->host.arch, caps->host.cpu, "vmx"); + +if (VIR_STRDUP(cpu->features->name, isVTx ? "vmx" : "svm") < 0) { +VIR_FREE(cpu->features); +VIR_FREE(cpu); +return -1; +} +cpu->features->policy = VIR_CPU_FEATURE_DISABLE; +cpu->nfeatures = cpu->nfeatures_max = 1; +} +cpu->mode = VIR_CPU_MODE_HOST_PASSTHROUGH; +cpu->type = VIR_CPU_TYPE_GUEST; +def->cpu = cpu; +} + } else { if (xenConfigCopyStringOpt(conf, "bootloader", &def->os.bootloader) < 0) return -1; @@ -897,6 +933,34 @@ xenFormatXLOS(virConfPtr conf, virDomainDefPtr def) if (xenConfigSetString(conf, "boot", boot) < 0) return -1; +if (def->cpu && +def->cpu->mode == VIR_CPU_MODE_HOST_PASSTHROUGH) { +bool hasHwVirt = true; + +if (def->cpu->nfeatures) { +for (i = 0; i < def->cpu->nfeatures; i++) { + +switch (def->cpu->features[i].policy) { +case VIR_CPU_FEATURE_DISABLE: +case VIR_CPU_FEATURE_FORBID: +if (STREQ(def->cpu->features[i].name, "vmx") || +STREQ(def->cpu->features[i].name, "svm")) +hasHwVirt = false; +break; + +case VIR_CPU_FEATURE_FORCE: +case VIR_CPU_FEATURE_REQUIRE: +case VIR_CPU_FEATURE_OPTIONAL: +case VIR_CPU_FEATURE_LAST: +break; +} +} +} + +if (xenConfigSetInt(conf, "nestedhvm", hasHwVirt) < 0) +return -1; +} + /* XXX floppy disks */ } else { if (def->os.bootloader && -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v3 0/3] libxl: nestedhvm support
From: Wim ten Have This patch enhances host-passthrough capability to advertise the required vendor CPU virtualization feature which will be used to enable 'nestedhvm' in the libxl driver. Wim ten Have (3): libxl: set nestedhvm for mode host-passthrough xenconfig: add conversions for xen-xl xlconfigtest: add tests for 'nestedhvm' support cfg.mk | 2 +- src/libxl/libxl_conf.c | 46 ++-- src/libxl/libxl_conf.h | 2 +- src/libxl/libxl_domain.c | 2 +- src/xenconfig/xen_xl.c | 64 ++ .../test-fullvirt-nestedhvm-disabled.cfg | 26 + .../test-fullvirt-nestedhvm-disabled.xml | 61 + tests/xlconfigdata/test-fullvirt-nestedhvm.cfg | 26 + tests/xlconfigdata/test-fullvirt-nestedhvm.xml | 59 tests/xlconfigtest.c | 2 + 10 files changed, 282 insertions(+), 8 deletions(-) create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.xml -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v3 3/3] xlconfigtest: add tests for 'nestedhvm' support
From: Wim ten Have Testing various configuration schemas targeting postive and negative nestedhvm under libvirt configuration. Mode "host-passthrough" generates nestedhvm=1 in/from xl format where Intel virtualization (VT-x): or AMD virtualization (AMD-V): disables virtualization mode under guest domains. Signed-off-by: Wim ten Have --- .../test-fullvirt-nestedhvm-disabled.cfg | 26 + .../test-fullvirt-nestedhvm-disabled.xml | 61 ++ tests/xlconfigdata/test-fullvirt-nestedhvm.cfg | 26 + tests/xlconfigdata/test-fullvirt-nestedhvm.xml | 59 + tests/xlconfigtest.c | 2 + 5 files changed, 174 insertions(+) create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.cfg create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.xml diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg new file mode 100644 index 000..d4b9f45 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 579 +memory = 394 +vcpus = 1 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +nestedhvm = 0 +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2", "format=qcow2,vdev=hdb,access=rw,backendtype=qdisk,target=/var/lib/libvirt/images/XenGuest2-home", "format=raw,vdev=hdc,access=ro,backendtype=qdisk,devtype=cdrom,target=/root/boot.iso" ] diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml new file mode 100644 index 000..58b6338 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml @@ -0,0 +1,61 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 592896 + 403456 + 1 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm.cfg b/tests/xlconfigdata/test-fullvirt-nestedhvm.cfg new file mode 100644 index 000..281f126 --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm.cfg @@ -0,0 +1,26 @@ +name = "XenGuest2" +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" +maxmem = 579 +memory = 394 +vcpus = 1 +pae = 1 +acpi = 1 +apic = 1 +viridian = 0 +rtc_timeoffset = 0 +localtime = 0 +on_poweroff = "destroy" +on_reboot = "restart" +on_crash = "restart" +device_model = "/usr/lib/xen/bin/qemu-system-i386" +sdl = 0 +vnc = 1 +vncunused = 1 +vnclisten = "127.0.0.1" +vif = [ "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] +parallel = "none" +serial = "none" +builder = "hvm" +boot = "d" +nestedhvm = 1 +disk = [ "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2", "format=qcow2,vdev=hdb,access=rw,backendtype=qdisk,target=/var/lib/libvirt/images/XenGuest2-home", "format=raw,vdev=hdc,access=ro,backendtype=qdisk,devtype=cdrom,target=/root/boot.iso" ] diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm.xml b/tests/xlconfigdata/test-fullvirt-nestedhvm.xml new file mode 100644 index 000..8c02e7a --- /dev/null +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm.xml @@ -0,0 +1,59 @@ + + XenGuest2 + c7a5fdb2-cdaf-9455-926a-d65c16db1809 + 592896 + 403456 + 1 + +hvm +/usr/lib/xen/boot/hvmloader + + + + + + + + + + destroy + restart + restart + +/usr/lib/xen/bin/qemu-system-i386 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xlconfigtest.c b/tests/x
Re: [libvirt] [PATCH v2 3/3] xlconfigtest: add tests for 'nestedhvm' support
On Mon, 17 Apr 2017 14:25:13 -0600 Jim Fehlig wrote: > On 03/24/2017 03:02 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Testing various configuration schemas targeting postive, negative > > and undefined nestedhvm under libvirt > > configuration. > > > > Mode "host-passthrough" generates nestedhvm=1 in/from xl format where > > > > Intel virtualization (VT-x): > > > > > > or > > > > AMD virtualization (AMD-V): > > > > > > disables virtualization mode under guest domains. > > > > Signed-off-by: Wim ten Have > > --- > > .../test-fullvirt-nestedhvm-disabled.cfg | 26 + > > .../test-fullvirt-nestedhvm-disabled.xml | 62 > > ++ > > .../test-fullvirt-nestedhvm-undefined.cfg | 25 + > > .../test-fullvirt-nestedhvm-undefined.xml | 58 > > > > The 'undefined' case is already tested by all the other tests that don't > contain > an explicit 'nestedhvm='. IMO it can be removed. O:-) ... will remove! Regards, - Wim. > > tests/xlconfigdata/test-fullvirt-nestedhvm.cfg | 26 + > > tests/xlconfigdata/test-fullvirt-nestedhvm.xml | 59 > > > > tests/xlconfigtest.c | 3 ++ > > 7 files changed, 259 insertions(+) > > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg > > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml > > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-undefined.cfg > > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-undefined.xml > > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.cfg > > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.xml > > > > diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg > > b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg > > new file mode 100644 > > index 000..d4b9f45 > > --- /dev/null > > +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg > > @@ -0,0 +1,26 @@ > > +name = "XenGuest2" > > +uuid = "c7a5fdb2-cdaf-9455-926a-d65c16db1809" > > +maxmem = 579 > > +memory = 394 > > +vcpus = 1 > > +pae = 1 > > +acpi = 1 > > +apic = 1 > > +viridian = 0 > > +rtc_timeoffset = 0 > > +localtime = 0 > > +on_poweroff = "destroy" > > +on_reboot = "restart" > > +on_crash = "restart" > > +device_model = "/usr/lib/xen/bin/qemu-system-i386" > > +sdl = 0 > > +vnc = 1 > > +vncunused = 1 > > +vnclisten = "127.0.0.1" > > +vif = [ > > "mac=00:16:3e:66:92:9c,bridge=xenbr1,script=vif-bridge,model=e1000" ] > > +parallel = "none" > > +serial = "none" > > +builder = "hvm" > > +boot = "d" > > +nestedhvm = 0 > > +disk = [ > > "format=raw,vdev=hda,access=rw,backendtype=phy,target=/dev/HostVG/XenGuest2", > > > > "format=qcow2,vdev=hdb,access=rw,backendtype=qdisk,target=/var/lib/libvirt/images/XenGuest2-home", > > > > "format=raw,vdev=hdc,access=ro,backendtype=qdisk,devtype=cdrom,target=/root/boot.iso" > > ] > > diff --git a/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml > > b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml > > new file mode 100644 > > index 000..a5b9233 > > --- /dev/null > > +++ b/tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml > > @@ -0,0 +1,62 @@ > > + > > + XenGuest2 > > + c7a5fdb2-cdaf-9455-926a-d65c16db1809 > > + 592896 > > + 403456 > > + 1 > > + > > +hvm > > +/usr/lib/xen/boot/hvmloader > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + destroy > > + restart > > + restart > > + > > +/usr/lib/xen/bin/qemu-system-i386 > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > + > > +
Re: [libvirt] [PATCH v2 2/3] xenconfig: add conversions for xen-xl
On Mon, 17 Apr 2017 14:22:20 -0600 Jim Fehlig wrote: > On 03/24/2017 03:02 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Per xen-xl conversions from and to native under host-passthrough > > mode we take care for Xen (nestedhvm = mode) applied and inherited > > settings generating or processing correct feature policy: > > > > [On Intel (VT-x) architectures] > > > > > > or > > > > [On AMD (AMD-V) architectures] > > > > > > It will then generate (or parse) for nestedhvm=1 in/from xl format. > > > > Signed-off-by: Joao Martins > > Signed-off-by: Wim ten Have > > --- > > src/xenconfig/xen_xl.c | 67 > > ++ > > 1 file changed, 67 insertions(+) > > > > diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c > > index 74f68b3..d3797b8 100644 > > --- a/src/xenconfig/xen_xl.c > > +++ b/src/xenconfig/xen_xl.c > > @@ -106,6 +106,7 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, > > virCapsPtr caps) > > if (def->os.type == VIR_DOMAIN_OSTYPE_HVM) { > > const char *bios; > > const char *boot; > > +int val = 0; > > > > if (xenConfigGetString(conf, "bios", &bios, NULL) < 0) > > return -1; > > @@ -164,6 +165,47 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, > > virCapsPtr caps) > > } > > def->os.nBootDevs++; > > } > > + > > +if (xenConfigGetBool(conf, "nestedhvm", &val, -1) < 0) > > +return -1; > > + > > +if (val != -1) { > > +virCPUDefPtr cpu = NULL; > > + > > +if (VIR_ALLOC(cpu) < 0) > > +return -1; > > + > > +if (val == 0) { > > +if (VIR_ALLOC_N(cpu->features, 2) < 0) > > +goto cleanup; > > + > > +/* > > + * Below is pointless in real world but for purpose > > + * of testing let's check features depth holding at > > + * least multiple elements and also check if both > > + * vmx|svm are understood. > > + */ > > +cpu->features[0].policy = VIR_CPU_FEATURE_DISABLE; > > +if (VIR_STRDUP(cpu->features[0].name, "vmx") < 0) > > +goto cleanup; > > +cpu->features[1].policy = VIR_CPU_FEATURE_DISABLE; > > +if (VIR_STRDUP(cpu->features[1].name, "svm") < 0) > > +goto cleanup; > > Since caps is passed to this function, can it be used to determine whether to > disable vmx or svm? Yes :-) ... thanks for enlightening me here as that is actually the correct approach if non-test domain (conversion) actions are made under libvirtd. There's one minor question here per me. To process caps for vmx|svm I'll bring in virCPUCheckFeature() which at its turn requires me to include "cpu/cpu.h" for its prototype. Unfortunate cfg.mk does not anticipate and raises a syntax-check caveat. prohibit_cross_inclusion src/xenconfig/xen_xl.c:51:#include "cpu/cpu.h" maint.mk: unsafe cross-directory include cfg.mk:773: recipe for target 'sc_prohibit_cross_inclusion' failed make: *** [sc_prohibit_cross_inclusion] Error 1 That is ... unless I apply below change to cfg.mk. - xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen)";; \ + xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen|cpu)";; \ Is that lethal to do? I tried to reason and fail to see why not, ie. i am a bit clueless for its specific reason ... but also new to whole arena. > > + > > +cpu->nfeatures = cpu->nfeatures_max = 2; > > +} > > +cpu->mode = VIR_CPU_MODE_HOST_PASSTHROUGH; > > +cpu->type = VIR_CPU_TYPE_GUEST; > > +def->cpu = cpu; > > +cpu = NULL; > > + cleanup: > > +if (cpu) { > > +VIR_FREE(cpu->features); > > +VIR_FREE(cpu); > > +return -1; > > +} > > I'm not fond of the cleanup label in the middle of this function. If we can > disable only one of vmx or svm, then there will be one less strdup and one > less > cleanup spot. Cleanup can then occur at the point of error.
Re: [libvirt] [PATCH v2 1/3] libxl: set nestedhvm for mode host-passthrough
On Mon, 17 Apr 2017 12:04:53 -0600 Jim Fehlig wrote: > On 03/24/2017 03:02 PM, Wim Ten Have wrote: > > From: Wim ten Have > > > > Xen feature nestedhvm is the option on Xen 4.4+ which enables > > nested virtualization when mode host-passthrough is applied. > > > > Virtualization on target domain can be disabled by specifying > > such under feature policy rule on target name; > > > > [On Intel (VT-x) architecture] > > > > > > or: > > > > [On AMD (AMD-V) architecture] > > > > I think we should also give an example of enabling nested HVM. E.g. > > Agree, will add. > > Signed-off-by: Joao Martins > > Signed-off-by: Wim ten Have > > --- > > src/libxl/libxl_conf.c | 47 > > ++- > > src/libxl/libxl_conf.h | 2 +- > > src/libxl/libxl_domain.c | 2 +- > > 3 files changed, 44 insertions(+), 7 deletions(-) > > > > diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c > > index f5b788b..ede7c8a 100644 > > --- a/src/libxl/libxl_conf.c > > +++ b/src/libxl/libxl_conf.c > > @@ -47,6 +47,7 @@ > > #include "libxl_utils.h" > > #include "virstoragefile.h" > > #include "secret_util.h" > > +#include "cpu/cpu.h" > > > > > > #define VIR_FROM_THIS VIR_FROM_LIBXL > > @@ -292,7 +293,7 @@ libxlMakeChrdevStr(virDomainChrDefPtr def, char **buf) > > > > static int > > libxlMakeDomBuildInfo(virDomainDefPtr def, > > - libxl_ctx *ctx, > > + libxlDriverConfigPtr cfg, > >libxl_domain_config *d_config) > > { > > libxl_domain_build_info *b_info = &d_config->b_info; > > @@ -308,7 +309,7 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, > > libxl_domain_build_info_init_type(b_info, LIBXL_DOMAIN_TYPE_PV); > > > > b_info->max_vcpus = virDomainDefGetVcpusMax(def); > > -if (libxl_cpu_bitmap_alloc(ctx, &b_info->avail_vcpus, > > b_info->max_vcpus)) > > +if (libxl_cpu_bitmap_alloc(cfg->ctx, &b_info->avail_vcpus, > > b_info->max_vcpus)) > > return -1; > > libxl_bitmap_set_none(&b_info->avail_vcpus); > > for (i = 0; i < virDomainDefGetVcpus(def); i++) > > @@ -374,6 +375,42 @@ libxlMakeDomBuildInfo(virDomainDefPtr def, > >def->features[VIR_DOMAIN_FEATURE_ACPI] == > >VIR_TRISTATE_SWITCH_ON); > > > > +if (cfg && def->cpu && > > + def->cpu->mode == (VIR_CPU_MODE_HOST_PASSTHROUGH)) { > > +bool hasHwVirt = false; > > +bool svm = false, vmx = false; > > +virCapsPtr caps = cfg->caps; > > + > > +if (caps && ARCH_IS_X86(def->os.arch)) { > > +virCPUDefPtr cpuDef = caps->host.cpu; > > I don't see much value in having this local variable. Indeed redundant (will remove). > > + > > +vmx = virCPUCheckFeature( > > +cfg->caps->host.arch, cpuDef, "vmx"); > > +svm = virCPUCheckFeature( > > +cfg->caps->host.arch, cpuDef, "svm"); > > +hasHwVirt = (vmx | svm); > > +} > > > And you already have a local 'caps' for cfg->caps. So this could be shortened > a > bit to > > vmx = virCPUCheckFeature(caps->host.arch, caps->host.cpu, "vmx"); > svm = virCPUCheckFeature(caps->host.arch, caps->host.cpu, "svm"); > hasHwVirt = vmx | svm; Agree. > > + > > +if (def->cpu->nfeatures) { > > +for (i = 0; i < def->cpu->nfeatures; i++) { > > + > > +switch (def->cpu->features[i].policy) { > > + > > +case VIR_CPU_FEATURE_DISABLE: > > +case VIR_CPU_FEATURE_FORBID: > > +if ((vmx && STREQ(def->cpu->features[i].name, > > "vmx")) || > > +(svm && STREQ(def->cpu->features[i].name, > > "svm"))) > > +hasHwVirt = false; > > +break; > > + > > +default: > > +break; > > Generally libvir
Re: [libvirt] [PATCH v2 0/3] libxl: nestedhvm support
Kind reminder, was this PATCH received and is it under review? % Date: Fri, 24 Mar 2017 22:02:33 +0100 % Subject: [PATCH v2 0/3] libxl: nestedhvm support % Subject: [PATCH v2 1/3] libxl: set nestedhvm for mode host-passthrough % Subject: [PATCH v2 2/3] xenconfig: add conversions for xen-xl % Subject: [PATCH v2 3/3] xlconfigtest: add tests for 'nestedhvm' support Regards, - Wim. On Fri, 24 Mar 2017 22:02:33 +0100 Wim Ten Have wrote: > From: Wim ten Have > > This patch enhances host-passthrough capability to advertise the > required vendor CPU virtualization feature which will be used to > enable 'nestedhvm' in the libxl driver. > > Wim ten Have (3): > libxl: set nestedhvm for mode host-passthrough > xenconfig: add conversions for xen-xl > xlconfigtest: add tests for 'nestedhvm' support > > src/libxl/libxl_conf.c | 47 +-- > src/libxl/libxl_conf.h | 2 +- > src/libxl/libxl_domain.c | 2 +- > src/xenconfig/xen_xl.c | 67 > ++ > .../test-fullvirt-nestedhvm-disabled.cfg | 26 + > .../test-fullvirt-nestedhvm-disabled.xml | 62 > .../test-fullvirt-nestedhvm-undefined.cfg | 25 > .../test-fullvirt-nestedhvm-undefined.xml | 58 +++ > tests/xlconfigdata/test-fullvirt-nestedhvm.cfg | 26 + > tests/xlconfigdata/test-fullvirt-nestedhvm.xml | 59 +++ > tests/xlconfigtest.c | 3 + > 11 files changed, 370 insertions(+), 7 deletions(-) > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.cfg > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-disabled.xml > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-undefined.cfg > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm-undefined.xml > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.cfg > create mode 100644 tests/xlconfigdata/test-fullvirt-nestedhvm.xml > -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] virConfSaveValue: protect against a NULL pointer reference
On Thu, 13 Apr 2017 11:12:36 -0600 Jim Fehlig wrote: > Wim Ten Have wrote: > > From: Wim ten Have > > Sorry for the delay. I've not had time to work on upstream libvirt activities > recently... Understood. There is one more PATCH out per me. Let me send a reminder under its specific cover letter. > > Fix xlconfigtest runs build for --enable-test-oom on > > Xen XL-2-XML Parse channel-pty > > > > Program received signal SIGSEGV, Segmentation fault. > > .. > > Odd that we've not stumbled across this earlier. But ACK to making this code a > bit more resilient. Patch has been pushed now. > > BTW, I noticed a few more OOM test failures in the channel formating code. > Patch > sent. Can you help review it? Sure, i actually had that one in my list to come along soon. > https://www.redhat.com/archives/libvir-list/2017-April/msg00740.html This marks correct fix! Pls go forward. Regards, - Wim. -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] virConfSaveValue: protect against a NULL pointer reference
From: Wim ten Have This patch protects against a NULL pointer dereference leading to a SEGV under xlconfigtest per DO_TEST("channel-pty"); VIR_TEST_OOM=1 VIR_TEST_RANGE=29 ./tests/xlconfigtest Wim ten Have (1): virConfSaveValue: protect against a NULL pointer reference src/util/virconf.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] virConfSaveValue: protect against a NULL pointer reference
From: Wim ten Have Fix xlconfigtest runs build for --enable-test-oom on Xen XL-2-XML Parse channel-pty Program received signal SIGSEGV, Segmentation fault. #0 0x73c2b373 in __strchr_sse2 () from /lib64/libc.so.6 ==> #1 0x77875701 in virConfSaveValue (buf=buf@entry=0x7fffd8a0, val=val@entry=0x674750) at util/virconf.c:290 #2 0x77875668 in virConfSaveValue (buf=buf@entry=0x7fffd8a0, val=) at util/virconf.c:306 #3 0x778757ef in virConfSaveEntry (buf=buf@entry=0x7fffd8a0, cur=cur@entry=0x674780) at util/virconf.c:338 #4 0x778783eb in virConfWriteMem (memory=0x665570 "", len=len@entry=0x7fffd910, conf=conf@entry=0x65b940) at util/virconf.c:1543 #5 0x0040eccb in testCompareParseXML (replaceVars=, xml=, xlcfg=0x662c00 "/home/wtenhave/WORK/libvirt/OOMtesting/libvirt-devel/tests/xlconfigdata/test-channel-pty.cfg") at xlconfigtest.c:108 #6 testCompareHelper (data=) at xlconfigtest.c:205 #7 0x00410b3a in virTestRun (title=title@entry=0x432cc0 "Xen XL-2-XML Parse channel-pty", body=body@entry=0x40e9b0 , data=data@entry=0x7fffd9f0) at testutils.c:247 #8 0x0040f322 in mymain () at xlconfigtest.c:278 #9 0x00411410 in virTestMain (argc=1, argv=0x7fffdba8, func=0x40f660 ) at testutils.c:992 #10 0x73bc0401 in __libc_start_main () from /lib64/libc.so.6 #11 0x0040e86a in _start () (gdb) frame 1 #1 0x77875701 in virConfSaveValue (buf=buf@entry=0x7fffd8a0, val=val@entry=0x674750) at util/virconf.c:290 290 if (strchr(val->str, '\n') != NULL) { (gdb) print *val $1 = {type = VIR_CONF_STRING, next = 0x0, l = 0, str = 0x0, list = 0x0} Signed-off-by: Wim ten Have --- src/util/virconf.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/util/virconf.c b/src/util/virconf.c index a85a307..9840ca6 100644 --- a/src/util/virconf.c +++ b/src/util/virconf.c @@ -287,14 +287,16 @@ virConfSaveValue(virBufferPtr buf, virConfValuePtr val) virBufferAsprintf(buf, "%llu", val->l); break; case VIR_CONF_STRING: -if (strchr(val->str, '\n') != NULL) { -virBufferAsprintf(buf, "\"\"\"%s\"\"\"", val->str); -} else if (strchr(val->str, '"') == NULL) { -virBufferAsprintf(buf, "\"%s\"", val->str); -} else if (strchr(val->str, '\'') == NULL) { -virBufferAsprintf(buf, "'%s'", val->str); -} else { -virBufferAsprintf(buf, "\"\"\"%s\"\"\"", val->str); +if (val->str) { +if (strchr(val->str, '\n') != NULL) { +virBufferAsprintf(buf, "\"\"\"%s\"\"\"", val->str); +} else if (strchr(val->str, '"') == NULL) { +virBufferAsprintf(buf, "\"%s\"", val->str); +} else if (strchr(val->str, '\'') == NULL) { +virBufferAsprintf(buf, "'%s'", val->str); +} else { +virBufferAsprintf(buf, "\"\"\"%s\"\"\"", val->str); +} } break; case VIR_CONF_LIST: { -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] xenFormatXLDomainDisks: avoid double free on OOM testing
From: Wim ten Have This patch fixes a double free that exposed under OOM testing xlconfigtest per DO_TEST("new-disk"); VIR_TEST_OOM=1 VIR_TEST_RANGE=5 ./tests/xlconfigtest Wim ten Have (1): xenFormatXLDomainDisks: avoid double free on OOM testing src/xenconfig/xen_xl.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] xenFormatXLDomainDisks: avoid double free on OOM testing
From: Wim ten Have Fix xlconfigtest runs build for --enable-test-oom on Xen XL-2-XML Parse new-disk #0 0x73bd791f in raise () from /lib64/libc.so.6 #1 0x73bd951a in abort () from /lib64/libc.so.6 #2 0x73c1b200 in __libc_message () from /lib64/libc.so.6 #3 0x73c2488a in _int_free () from /lib64/libc.so.6 #4 0x73c282bc in free () from /lib64/libc.so.6 #5 0x77864fcb in virFree (ptrptr=ptrptr@entry=0x7fffd868) at util/viralloc.c:582 #6 0x778776e5 in virConfFreeValue (val=) at util/virconf.c:178 ==> #7 0x00425759 in xenFormatXLDomainDisks (def=0x7fffd8c0, def=0x7fffd8c0, conf=0x658220) at xenconfig/xen_xl.c:1159 #8 xenFormatXL (def=def@entry=0x66ec20, conn=conn@entry=0x668cf0) at xenconfig/xen_xl.c:1558 #9 0x0040ea1d in testCompareParseXML (replaceVars=, xml=0x65f5e0 "/home/wtenhave/WORK/libvirt/history/libvirt/tests/xlconfigdata/test-fullvirt-ovmf.xml", xlcfg=0x65f6b0 "/home/wtenhave/WORK/libvirt/history/libvirt/tests/xlconfigdata/test-fullvirt-ovmf.cfg") at xlconfigtest.c:105 #10 testCompareHelper (data=) at xlconfigtest.c:205 #11 0x0041079a in virTestRun (title=title@entry=0x431cf0 "Xen XL-2-XML Parse fullvirt-ovmf", body=body@entry=0x40e720 , data=data@entry=0x7fffda50) at testutils.c:247 #12 0x0040ebc2 in mymain () at xlconfigtest.c:256 #13 0x00411070 in virTestMain (argc=1, argv=0x7fffdc08, func=0x40f2c0 ) at testutils.c:992 #14 0x73bc2401 in __libc_start_main () from /lib64/libc.so.6 #15 0x0040e5da in _start () symmetry seems missing its sibbling coded functionality demonstrated under functions; xenFormatXLUSBController() xenFormatXLUSB() xenFormatXLDomainChannels() xenFormatXMDisks Signed-off-by: Wim ten Have --- src/xenconfig/xen_xl.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index 74f68b3..43ecf41 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -1132,7 +1132,6 @@ xenFormatXLDisk(virConfValuePtr list, virDomainDiskDefPtr disk) static int xenFormatXLDomainDisks(virConfPtr conf, virDomainDefPtr def) { -int ret = -1; virConfValuePtr diskVal; size_t i; @@ -1150,15 +1149,19 @@ xenFormatXLDomainDisks(virConfPtr conf, virDomainDefPtr def) goto cleanup; } -if (diskVal->list != NULL) -if (virConfSetValue(conf, "disk", diskVal) == 0) -diskVal = NULL; +if (diskVal->list != NULL) { +int ret = virConfSetValue(conf, "disk", diskVal); +diskVal = NULL; +if (ret < 0) +return -1; +} +VIR_FREE(diskVal); -ret = 0; +return 0; cleanup: virConfFreeValue(diskVal); -return ret; +return -1; } -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH v2 2/3] xenconfig: add conversions for xen-xl
From: Wim ten Have Per xen-xl conversions from and to native under host-passthrough mode we take care for Xen (nestedhvm = mode) applied and inherited settings generating or processing correct feature policy: [On Intel (VT-x) architectures] or [On AMD (AMD-V) architectures] It will then generate (or parse) for nestedhvm=1 in/from xl format. Signed-off-by: Joao Martins Signed-off-by: Wim ten Have --- src/xenconfig/xen_xl.c | 67 ++ 1 file changed, 67 insertions(+) diff --git a/src/xenconfig/xen_xl.c b/src/xenconfig/xen_xl.c index 74f68b3..d3797b8 100644 --- a/src/xenconfig/xen_xl.c +++ b/src/xenconfig/xen_xl.c @@ -106,6 +106,7 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, virCapsPtr caps) if (def->os.type == VIR_DOMAIN_OSTYPE_HVM) { const char *bios; const char *boot; +int val = 0; if (xenConfigGetString(conf, "bios", &bios, NULL) < 0) return -1; @@ -164,6 +165,47 @@ xenParseXLOS(virConfPtr conf, virDomainDefPtr def, virCapsPtr caps) } def->os.nBootDevs++; } + +if (xenConfigGetBool(conf, "nestedhvm", &val, -1) < 0) +return -1; + +if (val != -1) { +virCPUDefPtr cpu = NULL; + +if (VIR_ALLOC(cpu) < 0) +return -1; + +if (val == 0) { +if (VIR_ALLOC_N(cpu->features, 2) < 0) +goto cleanup; + +/* + * Below is pointless in real world but for purpose + * of testing let's check features depth holding at + * least multiple elements and also check if both + * vmx|svm are understood. + */ +cpu->features[0].policy = VIR_CPU_FEATURE_DISABLE; +if (VIR_STRDUP(cpu->features[0].name, "vmx") < 0) +goto cleanup; +cpu->features[1].policy = VIR_CPU_FEATURE_DISABLE; +if (VIR_STRDUP(cpu->features[1].name, "svm") < 0) +goto cleanup; + +cpu->nfeatures = cpu->nfeatures_max = 2; +} +cpu->mode = VIR_CPU_MODE_HOST_PASSTHROUGH; +cpu->type = VIR_CPU_TYPE_GUEST; +def->cpu = cpu; +cpu = NULL; + cleanup: +if (cpu) { +VIR_FREE(cpu->features); +VIR_FREE(cpu); +return -1; +} +} + } else { if (xenConfigCopyStringOpt(conf, "bootloader", &def->os.bootloader) < 0) return -1; @@ -897,6 +939,31 @@ xenFormatXLOS(virConfPtr conf, virDomainDefPtr def) if (xenConfigSetString(conf, "boot", boot) < 0) return -1; +if (def->cpu && +def->cpu->mode == VIR_CPU_MODE_HOST_PASSTHROUGH) { +bool hasHwVirt = true; + +if (def->cpu->nfeatures) { +for (i = 0; i < def->cpu->nfeatures; i++) { + +switch (def->cpu->features[i].policy) { +case VIR_CPU_FEATURE_DISABLE: +case VIR_CPU_FEATURE_FORBID: +if (STREQ(def->cpu->features[i].name, "vmx") || +STREQ(def->cpu->features[i].name, "svm")) +hasHwVirt = false; +break; + +default: +break; +} +} +} + +if (xenConfigSetInt(conf, "nestedhvm", hasHwVirt) < 0) +return -1; +} + /* XXX floppy disks */ } else { if (def->os.bootloader && -- 2.9.3 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list