From: linhaifeng <haifeng....@huawei.com> If we create VM with two or more numa nodes qemu will create two or more hugepage files but qemu only send one hugepage file fd to vhost-user when VM's memory size is 2G and with two numa nodes.
Signed-off-by: linhaifeng <haifeng....@huawei.com> --- hw/virtio/vhost-user.c | 78 ++++++++++++++++++++++++++++++--------------- hw/virtio/vhost.c | 13 ++++++++ linux-headers/linux/vhost.h | 7 ++++ 3 files changed, 73 insertions(+), 25 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index aefe0bb..439cbba 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -24,6 +24,10 @@ #include <linux/vhost.h> #define VHOST_MEMORY_MAX_NREGIONS 8 +/* FIXME: same as the max number of numa node?*/ +#define HUGEPAGE_MAX_FILES 8 + +#define RAM_SHARED (1 << 1) typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -41,14 +45,15 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_KICK = 12, VHOST_USER_SET_VRING_CALL = 13, VHOST_USER_SET_VRING_ERR = 14, - VHOST_USER_MAX + VHOST_USER_MMAP_HUGEPAGE_FILE = 15, + VHOST_USER_UNMAP_HUGEPAGE_FILE = 16, + VHOST_USER_MAX, } VhostUserRequest; typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; uint64_t memory_size; uint64_t userspace_addr; - uint64_t mmap_offset; } VhostUserMemoryRegion; typedef struct VhostUserMemory { @@ -57,6 +62,16 @@ typedef struct VhostUserMemory { VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; } VhostUserMemory; +typedef struct HugepageMemoryInfo { + uint64_t base_addr; + uint64_t size; +}HugeMemInfo; + +typedef struct HugepageInfo { + uint32_t num; + HugeMemInfo files[HUGEPAGE_MAX_FILES]; +}HugepageInfo; + typedef struct VhostUserMsg { VhostUserRequest request; @@ -71,6 +86,7 @@ typedef struct VhostUserMsg { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + HugepageInfo huge_info; }; } QEMU_PACKED VhostUserMsg; @@ -104,7 +120,9 @@ static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */ + VHOST_MMAP_HUGEPAGE_FILE, /* VHOST_USER_MMAP_HUGEPAGE_FILE */ + VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */ }; static VhostUserRequest vhost_user_request_translate(unsigned long int request) @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, int fds[VHOST_MEMORY_MAX_NREGIONS]; int i, fd; size_t fd_num = 0; + RAMBlock *block; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, case VHOST_RESET_OWNER: break; - case VHOST_SET_MEM_TABLE: - for (i = 0; i < dev->mem->nregions; ++i) { - struct vhost_memory_region *reg = dev->mem->regions + i; - ram_addr_t ram_addr; + case VHOST_MMAP_HUGEPAGE_FILE: + qemu_mutex_lock_ramlist(); - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); - qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); - fd = qemu_get_ram_fd(ram_addr); - if (fd > 0) { - msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; - msg.memory.regions[fd_num].memory_size = reg->memory_size; - msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; - msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); - fds[fd_num++] = fd; + /* Get hugepage file informations */ + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + if (block->flags & RAM_SHARED && block->fd > 0) { + msg.huge_info.files[fd_num].size = block->length; + msg.huge_info.files[fd_num].base_addr = block->host; + fds[fd_num++] = block->fd; } } + msg.huge_info.num = fd_num; - msg.memory.nregions = fd_num; + /* Calculate msg size */ + msg.size = sizeof(m.huge_info.num); + msg.size += fd_num * sizeof(HugeMemInfo); + + qemu_mutex_unlock_ramlist(); + break; - if (!fd_num) { - error_report("Failed initializing vhost-user memory map\n" - "consider using -object memory-backend-file share=on\n"); - return -1; + case VHOST_UNMAP_HUGEPAGE_FILE: + /* Tell vhost-user to unmap all hugepage files. */ + break; + + case VHOST_SET_MEM_TABLE: + for (i = 0; i < dev->mem->nregions; i++) { + struct vhost_memory_region *reg = dev->mem->regions + i; + + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); + + msg.memory.regions[i].userspace_addr = reg->userspace_addr; + msg.memory.regions[i].memory_size = reg->memory_size; + msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr; + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); } + msg.memory.nregions = i; msg.size = sizeof(m.memory.nregions); msg.size += sizeof(m.memory.padding); - msg.size += fd_num * sizeof(VhostUserMemoryRegion); - + msg.size += i * sizeof(VhostUserMemoryRegion); break; case VHOST_SET_LOG_FD: diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 5a12861..b8eb341 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { goto fail_features; } + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { + r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, + NULL); + if (r < 0) { + r = -errno; + goto fail_mem; + } + } r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); if (r < 0) { r = -errno; @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) g_free(hdev->log); hdev->log = NULL; hdev->log_size = 0; + + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { + (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, + NULL); + } } diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c656f61..bb72811 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -113,6 +113,13 @@ struct vhost_memory { /* Set eventfd to signal an error */ #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) +/* Tell vhost-user to mmap hugepage file */ +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int) +/* Tell vhost-user to unmap hugepage file */ +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int) + +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread) + /* VHOST_NET specific defines */ /* Attach virtio net ring to a raw socket, or tap device. -- 1.9.0