On Fri, Mar 22, 2024 at 07:11:09PM +0100, Paolo Bonzini wrote: > From: Michael Roth <michael.r...@amd.com>
This should be: From: Xiaoyao Li <xiaoyao...@intel.com> Looks like the author got reset in my tree for some reason and I failed to notice it before posting. Sorry for the mix-up. -Mike > > Add KVM guest_memfd support to RAMBlock so both normal hva based memory > and kvm guest memfd based private memory can be associated in one RAMBlock. > > Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to > create private guest_memfd during RAMBlock setup. > > Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd > is more flexible and extensible than simply relying on the VM type because > in the future we may have the case that not all the memory of a VM need > guest memfd. As a benefit, it also avoid getting MachineState in memory > subsystem. > > Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of > confidential guests, such as TDX VM. How and when to set it for memory > backends will be implemented in the following patches. > > Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has > KVM guest_memfd allocated. > > Signed-off-by: Xiaoyao Li <xiaoyao...@intel.com> > Reviewed-by: David Hildenbrand <da...@redhat.com> > Message-ID: <20240320083945.991426-7-michael.r...@amd.com> > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > include/exec/memory.h | 20 +++++++++++++++++--- > include/exec/ram_addr.h | 2 +- > include/exec/ramblock.h | 1 + > include/sysemu/kvm.h | 3 ++- > accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++ > accel/stubs/kvm-stub.c | 5 +++++ > system/memory.c | 5 +++++ > system/physmem.c | 34 +++++++++++++++++++++++++++++++--- > 8 files changed, 90 insertions(+), 8 deletions(-) > > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 8626a355b31..679a8476852 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { > /* RAM FD is opened read-only */ > #define RAM_READONLY_FD (1 << 11) > > +/* RAM can be private that has kvm guest memfd backend */ > +#define RAM_GUEST_MEMFD (1 << 12) > + > static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, > IOMMUNotifierFlag flags, > hwaddr start, hwaddr end, > @@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr, > * @name: Region name, becomes part of RAMBlock name used in migration stream > * must be unique within any device > * @size: size of the region. > - * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. > + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, > + * RAM_GUEST_MEMFD. > * @errp: pointer to Error*, to store an error if it happens. > * > * Note that this function does not do anything to cause the data in the > @@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, > * (getpagesize()) will be used. > * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, > * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, > - * RAM_READONLY_FD > + * RAM_READONLY_FD, RAM_GUEST_MEMFD > * @path: the path in which to allocate the RAM. > * @offset: offset within the file referenced by path > * @errp: pointer to Error*, to store an error if it happens. > @@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, > * @size: size of the region. > * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, > * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, > - * RAM_READONLY_FD > + * RAM_READONLY_FD, RAM_GUEST_MEMFD > * @fd: the fd to mmap. > * @offset: offset within the file referenced by fd > * @errp: pointer to Error*, to store an error if it happens. > @@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion > *mr) > */ > bool memory_region_is_protected(MemoryRegion *mr); > > +/** > + * memory_region_has_guest_memfd: check whether a memory region has > guest_memfd > + * associated > + * > + * Returns %true if a memory region's ram_block has valid guest_memfd > assigned. > + * > + * @mr: the memory region being queried > + */ > +bool memory_region_has_guest_memfd(MemoryRegion *mr); > + > /** > * memory_region_get_iommu: check whether a memory region is an iommu > * > diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h > index de45ba7bc96..07c8f863750 100644 > --- a/include/exec/ram_addr.h > +++ b/include/exec/ram_addr.h > @@ -110,7 +110,7 @@ long qemu_maxrampagesize(void); > * @mr: the memory region where the ram block is > * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, > * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, > - * RAM_READONLY_FD > + * RAM_READONLY_FD, RAM_GUEST_MEMFD > * @mem_path or @fd: specify the backing file or device > * @offset: Offset into target file > * @errp: pointer to Error*, to store an error if it happens > diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h > index 848915ea5bf..459c8917de2 100644 > --- a/include/exec/ramblock.h > +++ b/include/exec/ramblock.h > @@ -41,6 +41,7 @@ struct RAMBlock { > QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; > int fd; > uint64_t fd_offset; > + int guest_memfd; > size_t page_size; > /* dirty bitmap used during migration */ > unsigned long *bmap; > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index bda309d5ffa..2cb31925091 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -537,7 +537,8 @@ void kvm_mark_guest_state_protected(void); > */ > bool kvm_hwpoisoned_mem(void); > > +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); > + > int kvm_set_memory_attributes_private(hwaddr start, hwaddr size); > int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size); > - > #endif > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index 36e39fd6514..6aa0608805b 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -92,6 +92,7 @@ static bool kvm_has_guest_debug; > static int kvm_sstep_flags; > static bool kvm_immediate_exit; > static uint64_t kvm_supported_memory_attributes; > +static bool kvm_guest_memfd_supported; > static hwaddr kvm_max_slot_size = ~0; > > static const KVMCapabilityInfo kvm_required_capabilites[] = { > @@ -2413,6 +2414,11 @@ static int kvm_init(MachineState *ms) > } > > kvm_supported_memory_attributes = kvm_check_extension(s, > KVM_CAP_MEMORY_ATTRIBUTES); > + kvm_guest_memfd_supported = > + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && > + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && > + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); > + > kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); > s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); > > @@ -4131,3 +4137,25 @@ void kvm_mark_guest_state_protected(void) > { > kvm_state->guest_state_protected = true; > } > + > +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) > +{ > + int fd; > + struct kvm_create_guest_memfd guest_memfd = { > + .size = size, > + .flags = flags, > + }; > + > + if (!kvm_guest_memfd_supported) { > + error_setg(errp, "KVM doesn't support guest memfd\n"); > + return -1; > + } > + > + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); > + if (fd < 0) { > + error_setg_errno(errp, errno, "Error creating kvm guest memfd"); > + return -1; > + } > + > + return fd; > +} > diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c > index ca381728840..8e0eb22e61c 100644 > --- a/accel/stubs/kvm-stub.c > +++ b/accel/stubs/kvm-stub.c > @@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void) > { > return false; > } > + > +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) > +{ > + return -ENOSYS; > +} > diff --git a/system/memory.c b/system/memory.c > index a229a79988f..c756950c0c0 100644 > --- a/system/memory.c > +++ b/system/memory.c > @@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr) > return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); > } > > +bool memory_region_has_guest_memfd(MemoryRegion *mr) > +{ > + return mr->ram_block && mr->ram_block->guest_memfd >= 0; > +} > + > uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) > { > uint8_t mask = mr->dirty_log_mask; > diff --git a/system/physmem.c b/system/physmem.c > index a4fe3d2bf89..f5dfa20e57e 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error > **errp) > const bool shared = qemu_ram_is_shared(new_block); > RAMBlock *block; > RAMBlock *last_block = NULL; > + bool free_on_error = false; > ram_addr_t old_ram_size, new_ram_size; > Error *err = NULL; > > @@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error > **errp) > return; > } > memory_try_enable_merging(new_block->host, > new_block->max_length); > + free_on_error = true; > + } > + } > + > + if (new_block->flags & RAM_GUEST_MEMFD) { > + assert(kvm_enabled()); > + assert(new_block->guest_memfd < 0); > + > + new_block->guest_memfd = > kvm_create_guest_memfd(new_block->max_length, > + 0, errp); > + if (new_block->guest_memfd < 0) { > + qemu_mutex_unlock_ramlist(); > + goto out_free; > } > } > > @@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error > **errp) > ram_block_notify_add(new_block->host, new_block->used_length, > new_block->max_length); > } > + return; > + > +out_free: > + if (free_on_error) { > + qemu_anon_ram_free(new_block->host, new_block->max_length); > + new_block->host = NULL; > + } > } > > #ifdef CONFIG_POSIX > @@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, > MemoryRegion *mr, > /* Just support these ram flags by now. */ > assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | > RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | > - RAM_READONLY_FD)) == 0); > + RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); > > if (xen_enabled()) { > error_setg(errp, "-mem-path not supported with Xen"); > @@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, > MemoryRegion *mr, > new_block->used_length = size; > new_block->max_length = size; > new_block->flags = ram_flags; > + new_block->guest_memfd = -1; > new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, > errp); > if (!new_block->host) { > @@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, > ram_addr_t max_size, > int align; > > assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | > - RAM_NORESERVE)) == 0); > + RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); > assert(!host ^ (ram_flags & RAM_PREALLOC)); > > align = qemu_real_host_page_size(); > @@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, > ram_addr_t max_size, > new_block->max_length = max_size; > assert(max_size >= size); > new_block->fd = -1; > + new_block->guest_memfd = -1; > new_block->page_size = qemu_real_host_page_size(); > new_block->host = host; > new_block->flags = ram_flags; > @@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void > *host, > RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, > MemoryRegion *mr, Error **errp) > { > - assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); > + assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == > 0); > return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, > errp); > } > > @@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block) > } else { > qemu_anon_ram_free(block->host, block->max_length); > } > + > + if (block->guest_memfd >= 0) { > + close(block->guest_memfd); > + } > + > g_free(block); > } > > -- > 2.44.0 >