There are a lot of documentation in the comments about how PPC64 handles passthrough VFIO devices to calculate the memLockLimit. And more will be added with the PPC64 NVLink2 support code.
Let's remove the PPC64 code from qemuDomainGetMemLockLimitBytes body and put it into a helper function. This will simply the flow of qemuDomainGetMemLockLimitBytes that handles all other platforms and improves the readability of PPC64 specifics. Suggested-by: Erik Skultety <eskul...@redhat.com> Signed-off-by: Daniel Henrique Barboza <danielhb...@gmail.com> --- src/qemu/qemu_domain.c | 169 ++++++++++++++++++++++------------------- 1 file changed, 91 insertions(+), 78 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 099097fe62..77548c224c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -10343,6 +10343,95 @@ qemuDomainUpdateCurrentMemorySize(virDomainObjPtr vm) } +/** + * getPPC64MemLockLimitBytes: + * @def: domain definition + * + * A PPC64 helper that calculates the memory locking limit in order for + * the guest to operate properly. + */ +static unsigned long long +getPPC64MemLockLimitBytes(virDomainDefPtr def) +{ + unsigned long long memKB = 0; + unsigned long long baseLimit, memory, maxMemory; + unsigned long long passthroughLimit = 0; + size_t i, nPCIHostBridges = 0; + bool usesVFIO = false; + + for (i = 0; i < def->ncontrollers; i++) { + virDomainControllerDefPtr cont = def->controllers[i]; + + if (!virDomainControllerIsPSeriesPHB(cont)) + continue; + + nPCIHostBridges++; + } + + for (i = 0; i < def->nhostdevs; i++) { + virDomainHostdevDefPtr dev = def->hostdevs[i]; + + if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && + dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { + usesVFIO = true; + break; + } + } + + memory = virDomainDefGetMemoryTotal(def); + + if (def->mem.max_memory) + maxMemory = def->mem.max_memory; + else + maxMemory = memory; + + /* baseLimit := maxMemory / 128 (a) + * + 4 MiB * #PHBs + 8 MiB (b) + * + * (a) is the hash table + * + * (b) is accounting for the 32-bit DMA window - it could be either the + * KVM accelerated TCE tables for emulated devices, or the VFIO + * userspace view. The 4 MiB per-PHB (including the default one) covers + * a 2GiB DMA window: default is 1GiB, but it's possible it'll be + * increased to help performance. The 8 MiB extra should be plenty for + * the TCE table index for any reasonable number of PHBs and several + * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */ + baseLimit = maxMemory / 128 + + 4096 * nPCIHostBridges + + 8192; + + /* passthroughLimit := max( 2 GiB * #PHBs, (c) + * memory (d) + * + memory * 1/512 * #PHBs + 8 MiB ) (e) + * + * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB + * rather than 1 GiB + * + * (d) is the with-DDW (and memory pre-registration and related + * features) DMA window accounting - assuming that we only account RAM + * once, even if mapped to multiple PHBs + * + * (e) is the with-DDW userspace view and overhead for the 64-bit DMA + * window. This is based a bit on expected guest behaviour, but there + * really isn't a way to completely avoid that. We assume the guest + * requests a 64-bit DMA window (per PHB) just big enough to map all + * its RAM. 4 kiB page size gives the 1/512; it will be less with 64 + * kiB pages, less still if the guest is mapped with hugepages (unlike + * the default 32-bit DMA window, DDW windows can use large IOMMU + * pages). 8 MiB is for second and further level overheads, like (b) */ + if (usesVFIO) + passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges, + memory + + memory / 512 * nPCIHostBridges + 8192); + + memKB = baseLimit + passthroughLimit; + + return memKB << 10; +} + + /** * qemuDomainGetMemLockLimitBytes: * @def: domain definition @@ -10374,84 +10463,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def) if (def->mem.locked) return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; - if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) { - unsigned long long maxMemory; - unsigned long long memory; - unsigned long long baseLimit; - unsigned long long passthroughLimit = 0; - size_t nPCIHostBridges = 0; - bool usesVFIO = false; - - for (i = 0; i < def->ncontrollers; i++) { - virDomainControllerDefPtr cont = def->controllers[i]; - - if (!virDomainControllerIsPSeriesPHB(cont)) - continue; - - nPCIHostBridges++; - } - - for (i = 0; i < def->nhostdevs; i++) { - virDomainHostdevDefPtr dev = def->hostdevs[i]; - - if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && - dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && - dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { - usesVFIO = true; - break; - } - } - - memory = virDomainDefGetMemoryTotal(def); - - if (def->mem.max_memory) - maxMemory = def->mem.max_memory; - else - maxMemory = memory; - - /* baseLimit := maxMemory / 128 (a) - * + 4 MiB * #PHBs + 8 MiB (b) - * - * (a) is the hash table - * - * (b) is accounting for the 32-bit DMA window - it could be either the - * KVM accelerated TCE tables for emulated devices, or the VFIO - * userspace view. The 4 MiB per-PHB (including the default one) covers - * a 2GiB DMA window: default is 1GiB, but it's possible it'll be - * increased to help performance. The 8 MiB extra should be plenty for - * the TCE table index for any reasonable number of PHBs and several - * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */ - baseLimit = maxMemory / 128 + - 4096 * nPCIHostBridges + - 8192; - - /* passthroughLimit := max( 2 GiB * #PHBs, (c) - * memory (d) - * + memory * 1/512 * #PHBs + 8 MiB ) (e) - * - * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB - * rather than 1 GiB - * - * (d) is the with-DDW (and memory pre-registration and related - * features) DMA window accounting - assuming that we only account RAM - * once, even if mapped to multiple PHBs - * - * (e) is the with-DDW userspace view and overhead for the 64-bit DMA - * window. This is based a bit on expected guest behaviour, but there - * really isn't a way to completely avoid that. We assume the guest - * requests a 64-bit DMA window (per PHB) just big enough to map all - * its RAM. 4 kiB page size gives the 1/512; it will be less with 64 - * kiB pages, less still if the guest is mapped with hugepages (unlike - * the default 32-bit DMA window, DDW windows can use large IOMMU - * pages). 8 MiB is for second and further level overheads, like (b) */ - if (usesVFIO) - passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges, - memory + - memory / 512 * nPCIHostBridges + 8192); - - memKB = baseLimit + passthroughLimit; - goto done; - } + if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) + return getPPC64MemLockLimitBytes(def); /* For device passthrough using VFIO the guest memory and MMIO memory * regions need to be locked persistent in order to allow DMA. -- 2.20.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list