When convert-in-place=true, the shared memory allocated/provided by the guest-memfd memory backend should also be used internally for private memory. Do this by dup()'ing the guest_memfd FD so separate cleanup paths for shared vs. private FDs can be managed in the same way they are currently for convert-in-place=false (where shared memory comes from some other backend like memory-backend-memfd).
Since it only currently makes sense to allow a memory-backend-guest-memfd FD to be used for private memory, introduce a new RAM_GUEST_MEMFD_SHARED flag that can be used to limit dup()'ing to specific backend types like memory-backend-guest-memfd. Signed-off-by: Michael Roth <[email protected]> --- backends/hostmem-guest-memfd.c | 1 + include/system/memory.h | 3 +++ system/physmem.c | 46 +++++++++++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/backends/hostmem-guest-memfd.c b/backends/hostmem-guest-memfd.c index deb796a6bd..8ab8242892 100644 --- a/backends/hostmem-guest-memfd.c +++ b/backends/hostmem-guest-memfd.c @@ -56,6 +56,7 @@ have_fd: ram_flags = backend->share ? RAM_SHARED : RAM_PRIVATE; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + ram_flags |= RAM_GUEST_MEMFD_SHARED; return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); } diff --git a/include/system/memory.h b/include/system/memory.h index 24c68720aa..0a371b686a 100644 --- a/include/system/memory.h +++ b/include/system/memory.h @@ -282,6 +282,9 @@ typedef struct IOMMUTLBEvent { */ #define RAM_PRIVATE (1 << 13) +/* RAM can be shared that has kvm guest memfd backend */ +#define RAM_GUEST_MEMFD_SHARED (1 << 14) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, diff --git a/system/physmem.c b/system/physmem.c index 04c7c38721..ebec7ae7a4 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -59,6 +59,7 @@ #include "system/hostmem.h" #include "system/hw_accel.h" #include "system/xen-mapcache.h" +#include "system/confidential-guest-support.h" #include "trace.h" #ifdef CONFIG_FALLOCATE_PUNCH_HOLE @@ -2187,11 +2188,14 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) if (new_block->flags & RAM_GUEST_MEMFD) { int ret; + assert(current_machine->cgs); + if (!kvm_enabled()) { error_setg(errp, "cannot set up private guest memory for %s: KVM required", object_get_typename(OBJECT(current_machine->cgs))); goto out_free; } + assert(new_block->guest_memfd < 0); ret = ram_block_coordinated_discard_require(true); @@ -2202,8 +2206,38 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) goto out_free; } - new_block->guest_memfd = kvm_create_guest_memfd_private(new_block->max_length, - errp); + /* + * If both shared/private memory are handled by guest_memfd, make sure to + * re-use the guest_memfd inode that should have already been created for + * handling shared memory. + */ + if (current_machine->cgs->convert_in_place) { + if (!(new_block->flags & RAM_GUEST_MEMFD_SHARED)) { + error_setg(errp, "configured memory backend is not compatible with in-place conversion"); + qemu_mutex_unlock_ramlist(); + goto out_free; + } + assert(new_block->fd >= 0); + + /* + * Current logic calculates guest_memfd_offset on the assumption that + * offset 0 corresponds to the first GPA that is backed by the RAM + * block/backend. For cases where the guest_memfd is only used for + * private memory and created internally as-needed this is always the + * case, but when re-using a guest_memfd that's also usable for shared + * memory (e.g. via memory-backend-guest-memfd) it's possible that + * guest_memfd might be mmap()'d starting at some non-zero offset. For + * now, this isn't a reachable condition, but assert this in case this + * ever changes and the logic needs to be updated to account for this. + */ + assert(new_block->fd_offset == 0); + + new_block->guest_memfd = qemu_dup(new_block->fd); + } else { + new_block->guest_memfd = + kvm_create_guest_memfd_private(new_block->max_length, errp); + } + if (new_block->guest_memfd < 0) { qemu_mutex_unlock_ramlist(); goto out_free; @@ -2315,7 +2349,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size, assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | RAM_READONLY_FD | RAM_GUEST_MEMFD | - RAM_RESIZEABLE)) == 0); + RAM_RESIZEABLE | RAM_GUEST_MEMFD_SHARED)) == 0); assert(max_size >= size); if (xen_enabled()) { @@ -2828,6 +2862,12 @@ int ram_block_rebind(Error **errp) { RAMBlock *block; + if (current_machine->cgs && current_machine->cgs->convert_in_place) { + error_setg(errp, + "reset support is not yet enabled for in-place conversion"); + return -1; + } + qemu_mutex_lock_ramlist(); RAMBLOCK_FOREACH(block) { -- 2.43.0
