On 2014/12/17 14:02, haifeng....@huawei.com wrote: > From: linhaifeng <haifeng....@huawei.com> > > If we create VM with two or more numa nodes qemu will create two > or more hugepage files but qemu only send one hugepage file fd > to vhost-user when VM's memory size is 2G and with two numa nodes. > > Signed-off-by: linhaifeng <haifeng....@huawei.com> > --- > hw/virtio/vhost-user.c | 78 > ++++++++++++++++++++++++++++++--------------- > hw/virtio/vhost.c | 13 ++++++++ > linux-headers/linux/vhost.h | 7 ++++ > 3 files changed, 73 insertions(+), 25 deletions(-) > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > index aefe0bb..439cbba 100644 > --- a/hw/virtio/vhost-user.c > +++ b/hw/virtio/vhost-user.c > @@ -24,6 +24,10 @@ > #include <linux/vhost.h> > > #define VHOST_MEMORY_MAX_NREGIONS 8 > +/* FIXME: same as the max number of numa node?*/ > +#define HUGEPAGE_MAX_FILES 8 > + > +#define RAM_SHARED (1 << 1) > > typedef enum VhostUserRequest { > VHOST_USER_NONE = 0, > @@ -41,14 +45,15 @@ typedef enum VhostUserRequest { > VHOST_USER_SET_VRING_KICK = 12, > VHOST_USER_SET_VRING_CALL = 13, > VHOST_USER_SET_VRING_ERR = 14, > - VHOST_USER_MAX > + VHOST_USER_MMAP_HUGEPAGE_FILE = 15, > + VHOST_USER_UNMAP_HUGEPAGE_FILE = 16, > + VHOST_USER_MAX, > } VhostUserRequest; > > typedef struct VhostUserMemoryRegion { > uint64_t guest_phys_addr; > uint64_t memory_size; > uint64_t userspace_addr; > - uint64_t mmap_offset; > } VhostUserMemoryRegion; > > typedef struct VhostUserMemory { > @@ -57,6 +62,16 @@ typedef struct VhostUserMemory { > VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > } VhostUserMemory; > > +typedef struct HugepageMemoryInfo { > + uint64_t base_addr; > + uint64_t size; > +}HugeMemInfo; > + > +typedef struct HugepageInfo { > + uint32_t num; > + HugeMemInfo files[HUGEPAGE_MAX_FILES]; > +}HugepageInfo; > + > typedef struct VhostUserMsg { > VhostUserRequest request; > > @@ -71,6 +86,7 @@ typedef struct VhostUserMsg { > struct vhost_vring_state state; > struct vhost_vring_addr addr; > VhostUserMemory memory; > + HugepageInfo huge_info; > }; > } QEMU_PACKED VhostUserMsg; > > @@ -104,7 +120,9 @@ static unsigned long int > ioctl_to_vhost_user_request[VHOST_USER_MAX] = { > VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ > VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ > VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ > - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ > + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */ > + VHOST_MMAP_HUGEPAGE_FILE, /* VHOST_USER_MMAP_HUGEPAGE_FILE */ > + VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */ > }; > > static VhostUserRequest vhost_user_request_translate(unsigned long int > request) > @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, > unsigned long int request, > int fds[VHOST_MEMORY_MAX_NREGIONS]; > int i, fd; > size_t fd_num = 0; > + RAMBlock *block; > > assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); > > @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, > unsigned long int request, > case VHOST_RESET_OWNER: > break; > > - case VHOST_SET_MEM_TABLE: > - for (i = 0; i < dev->mem->nregions; ++i) { > - struct vhost_memory_region *reg = dev->mem->regions + i; > - ram_addr_t ram_addr; > + case VHOST_MMAP_HUGEPAGE_FILE: > + qemu_mutex_lock_ramlist(); > > - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); > - qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, > &ram_addr); > - fd = qemu_get_ram_fd(ram_addr); > - if (fd > 0) { > - msg.memory.regions[fd_num].userspace_addr = > reg->userspace_addr; > - msg.memory.regions[fd_num].memory_size = reg->memory_size; > - msg.memory.regions[fd_num].guest_phys_addr = > reg->guest_phys_addr; > - msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr > - > - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); > - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); > - fds[fd_num++] = fd; > + /* Get hugepage file informations */ > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + if (block->flags & RAM_SHARED && block->fd > 0) { > + msg.huge_info.files[fd_num].size = block->length; > + msg.huge_info.files[fd_num].base_addr = block->host; > + fds[fd_num++] = block->fd; > } > } > + msg.huge_info.num = fd_num; > > - msg.memory.nregions = fd_num; > + /* Calculate msg size */ > + msg.size = sizeof(m.huge_info.num); > + msg.size += fd_num * sizeof(HugeMemInfo); > + > + qemu_mutex_unlock_ramlist(); > + break; > > - if (!fd_num) { > - error_report("Failed initializing vhost-user memory map\n" > - "consider using -object memory-backend-file share=on\n"); > - return -1; > + case VHOST_UNMAP_HUGEPAGE_FILE: > + /* Tell vhost-user to unmap all hugepage files. */ > + break; > + > + case VHOST_SET_MEM_TABLE: > + for (i = 0; i < dev->mem->nregions; i++) { > + struct vhost_memory_region *reg = dev->mem->regions + i; > + > + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); > + > + msg.memory.regions[i].userspace_addr = reg->userspace_addr; > + msg.memory.regions[i].memory_size = reg->memory_size; > + msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr; > + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); > } > > + msg.memory.nregions = i; > msg.size = sizeof(m.memory.nregions); > msg.size += sizeof(m.memory.padding); > - msg.size += fd_num * sizeof(VhostUserMemoryRegion); > - > + msg.size += i * sizeof(VhostUserMemoryRegion); > break; > > case VHOST_SET_LOG_FD: > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index 5a12861..b8eb341 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, > VirtIODevice *vdev) > if (r < 0) { > goto fail_features; > } > + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { > + r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, > + NULL); > + if (r < 0) { > + r = -errno; > + goto fail_mem; > + } > + } > r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); > if (r < 0) { > r = -errno; > @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, > VirtIODevice *vdev) > g_free(hdev->log); > hdev->log = NULL; > hdev->log_size = 0; > + > + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { > + (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE,
VHOST_MMAP_HUGEPAGE_FILE -> VHOST_UNMAP_HUGEPAGE_FILE > + NULL); > + } > } > > diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h > index c656f61..bb72811 100644 > --- a/linux-headers/linux/vhost.h > +++ b/linux-headers/linux/vhost.h > @@ -113,6 +113,13 @@ struct vhost_memory { > /* Set eventfd to signal an error */ > #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) > > +/* Tell vhost-user to mmap hugepage file */ > +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int) > +/* Tell vhost-user to unmap hugepage file */ > +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int) > + > +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread) > + > /* VHOST_NET specific defines */ > > /* Attach virtio net ring to a raw socket, or tap device. > -- Regards, Haifeng