Hi,Michael S.Tsirkin The vhost-user device will not work if there are two numa nodes in VM.
Should we fix this bug or ignore it ? On 2014/12/18 13:06, Linhaifeng wrote: > > > On 2014/12/17 14:02, haifeng....@huawei.com wrote: >> From: linhaifeng <haifeng....@huawei.com> >> >> If we create VM with two or more numa nodes qemu will create two >> or more hugepage files but qemu only send one hugepage file fd >> to vhost-user when VM's memory size is 2G and with two numa nodes. >> >> Signed-off-by: linhaifeng <haifeng....@huawei.com> >> --- >> hw/virtio/vhost-user.c | 78 >> ++++++++++++++++++++++++++++++--------------- >> hw/virtio/vhost.c | 13 ++++++++ >> linux-headers/linux/vhost.h | 7 ++++ >> 3 files changed, 73 insertions(+), 25 deletions(-) >> >> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c >> index aefe0bb..439cbba 100644 >> --- a/hw/virtio/vhost-user.c >> +++ b/hw/virtio/vhost-user.c >> @@ -24,6 +24,10 @@ >> #include <linux/vhost.h> >> >> #define VHOST_MEMORY_MAX_NREGIONS 8 >> +/* FIXME: same as the max number of numa node?*/ >> +#define HUGEPAGE_MAX_FILES 8 >> + >> +#define RAM_SHARED (1 << 1) >> >> typedef enum VhostUserRequest { >> VHOST_USER_NONE = 0, >> @@ -41,14 +45,15 @@ typedef enum VhostUserRequest { >> VHOST_USER_SET_VRING_KICK = 12, >> VHOST_USER_SET_VRING_CALL = 13, >> VHOST_USER_SET_VRING_ERR = 14, >> - VHOST_USER_MAX >> + VHOST_USER_MMAP_HUGEPAGE_FILE = 15, >> + VHOST_USER_UNMAP_HUGEPAGE_FILE = 16, >> + VHOST_USER_MAX, >> } VhostUserRequest; >> >> typedef struct VhostUserMemoryRegion { >> uint64_t guest_phys_addr; >> uint64_t memory_size; >> uint64_t userspace_addr; >> - uint64_t mmap_offset; >> } VhostUserMemoryRegion; >> >> typedef struct VhostUserMemory { >> @@ -57,6 +62,16 @@ typedef struct VhostUserMemory { >> VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; >> } VhostUserMemory; >> >> +typedef struct HugepageMemoryInfo { >> + uint64_t base_addr; >> + uint64_t size; >> +}HugeMemInfo; >> + >> +typedef struct HugepageInfo { >> + uint32_t num; >> + HugeMemInfo files[HUGEPAGE_MAX_FILES]; >> +}HugepageInfo; >> + >> typedef struct VhostUserMsg { >> VhostUserRequest request; >> >> @@ -71,6 +86,7 @@ typedef struct VhostUserMsg { >> struct vhost_vring_state state; >> struct vhost_vring_addr addr; >> VhostUserMemory memory; >> + HugepageInfo huge_info; >> }; >> } QEMU_PACKED VhostUserMsg; >> >> @@ -104,7 +120,9 @@ static unsigned long int >> ioctl_to_vhost_user_request[VHOST_USER_MAX] = { >> VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ >> VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ >> VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ >> - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ >> + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */ >> + VHOST_MMAP_HUGEPAGE_FILE, /* VHOST_USER_MMAP_HUGEPAGE_FILE */ >> + VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */ >> }; >> >> static VhostUserRequest vhost_user_request_translate(unsigned long int >> request) >> @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, >> unsigned long int request, >> int fds[VHOST_MEMORY_MAX_NREGIONS]; >> int i, fd; >> size_t fd_num = 0; >> + RAMBlock *block; >> >> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); >> >> @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, >> unsigned long int request, >> case VHOST_RESET_OWNER: >> break; >> >> - case VHOST_SET_MEM_TABLE: >> - for (i = 0; i < dev->mem->nregions; ++i) { >> - struct vhost_memory_region *reg = dev->mem->regions + i; >> - ram_addr_t ram_addr; >> + case VHOST_MMAP_HUGEPAGE_FILE: >> + qemu_mutex_lock_ramlist(); >> >> - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); >> - qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, >> &ram_addr); >> - fd = qemu_get_ram_fd(ram_addr); >> - if (fd > 0) { >> - msg.memory.regions[fd_num].userspace_addr = >> reg->userspace_addr; >> - msg.memory.regions[fd_num].memory_size = reg->memory_size; >> - msg.memory.regions[fd_num].guest_phys_addr = >> reg->guest_phys_addr; >> - msg.memory.regions[fd_num].mmap_offset = >> reg->userspace_addr - >> - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); >> - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); >> - fds[fd_num++] = fd; >> + /* Get hugepage file informations */ >> + QTAILQ_FOREACH(block, &ram_list.blocks, next) { >> + if (block->flags & RAM_SHARED && block->fd > 0) { >> + msg.huge_info.files[fd_num].size = block->length; >> + msg.huge_info.files[fd_num].base_addr = block->host; >> + fds[fd_num++] = block->fd; >> } >> } >> + msg.huge_info.num = fd_num; >> >> - msg.memory.nregions = fd_num; >> + /* Calculate msg size */ >> + msg.size = sizeof(m.huge_info.num); >> + msg.size += fd_num * sizeof(HugeMemInfo); >> + >> + qemu_mutex_unlock_ramlist(); >> + break; >> >> - if (!fd_num) { >> - error_report("Failed initializing vhost-user memory map\n" >> - "consider using -object memory-backend-file >> share=on\n"); >> - return -1; >> + case VHOST_UNMAP_HUGEPAGE_FILE: >> + /* Tell vhost-user to unmap all hugepage files. */ >> + break; >> + >> + case VHOST_SET_MEM_TABLE: >> + for (i = 0; i < dev->mem->nregions; i++) { >> + struct vhost_memory_region *reg = dev->mem->regions + i; >> + >> + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); >> + >> + msg.memory.regions[i].userspace_addr = reg->userspace_addr; >> + msg.memory.regions[i].memory_size = reg->memory_size; >> + msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr; >> + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); >> } >> >> + msg.memory.nregions = i; >> msg.size = sizeof(m.memory.nregions); >> msg.size += sizeof(m.memory.padding); >> - msg.size += fd_num * sizeof(VhostUserMemoryRegion); >> - >> + msg.size += i * sizeof(VhostUserMemoryRegion); >> break; >> >> case VHOST_SET_LOG_FD: >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >> index 5a12861..b8eb341 100644 >> --- a/hw/virtio/vhost.c >> +++ b/hw/virtio/vhost.c >> @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, >> VirtIODevice *vdev) >> if (r < 0) { >> goto fail_features; >> } >> + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { >> + r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, >> + NULL); >> + if (r < 0) { >> + r = -errno; >> + goto fail_mem; >> + } >> + } >> r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); >> if (r < 0) { >> r = -errno; >> @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, >> VirtIODevice *vdev) >> g_free(hdev->log); >> hdev->log = NULL; >> hdev->log_size = 0; >> + >> + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { >> + (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, > > VHOST_MMAP_HUGEPAGE_FILE -> VHOST_UNMAP_HUGEPAGE_FILE > >> + NULL); >> + } >> } >> >> diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h >> index c656f61..bb72811 100644 >> --- a/linux-headers/linux/vhost.h >> +++ b/linux-headers/linux/vhost.h >> @@ -113,6 +113,13 @@ struct vhost_memory { >> /* Set eventfd to signal an error */ >> #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct >> vhost_vring_file) >> >> +/* Tell vhost-user to mmap hugepage file */ >> +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int) >> +/* Tell vhost-user to unmap hugepage file */ >> +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int) >> + >> +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread) >> + >> /* VHOST_NET specific defines */ >> >> /* Attach virtio net ring to a raw socket, or tap device. >> > -- Regards, Haifeng