On Mon, Feb 02, 2015 at 12:29:47AM +0100, Paolo Bonzini wrote: > > > On 17/12/2014 07:02, haifeng....@huawei.com wrote: > > From: linhaifeng <haifeng....@huawei.com> > > > > If we create VM with two or more numa nodes qemu will create two > > or more hugepage files but qemu only send one hugepage file fd > > to vhost-user when VM's memory size is 2G and with two numa nodes. > > > > Signed-off-by: linhaifeng <haifeng....@huawei.com> > > The bug is in vhost_dev_assign_memory. It doesn't check that the file > descriptor matches when merging regions. Michael, does the merging > trigger in practice? Can we just eliminate it? > > Paolo
I'm not sure: does memory core ever give us two adjacent RAM segments that we *can* merge? If yes it would trigger, and extra memory slots slow down lookups linearly so they aren't free. > > --- > > hw/virtio/vhost-user.c | 78 > > ++++++++++++++++++++++++++++++--------------- > > hw/virtio/vhost.c | 13 ++++++++ > > linux-headers/linux/vhost.h | 7 ++++ > > 3 files changed, 73 insertions(+), 25 deletions(-) > > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > > index aefe0bb..439cbba 100644 > > --- a/hw/virtio/vhost-user.c > > +++ b/hw/virtio/vhost-user.c > > @@ -24,6 +24,10 @@ > > #include <linux/vhost.h> > > > > #define VHOST_MEMORY_MAX_NREGIONS 8 > > +/* FIXME: same as the max number of numa node?*/ > > +#define HUGEPAGE_MAX_FILES 8 > > + > > +#define RAM_SHARED (1 << 1) > > > > typedef enum VhostUserRequest { > > VHOST_USER_NONE = 0, > > @@ -41,14 +45,15 @@ typedef enum VhostUserRequest { > > VHOST_USER_SET_VRING_KICK = 12, > > VHOST_USER_SET_VRING_CALL = 13, > > VHOST_USER_SET_VRING_ERR = 14, > > - VHOST_USER_MAX > > + VHOST_USER_MMAP_HUGEPAGE_FILE = 15, > > + VHOST_USER_UNMAP_HUGEPAGE_FILE = 16, > > + VHOST_USER_MAX, > > } VhostUserRequest; > > > > typedef struct VhostUserMemoryRegion { > > uint64_t guest_phys_addr; > > uint64_t memory_size; > > uint64_t userspace_addr; > > - uint64_t mmap_offset; > > } VhostUserMemoryRegion; > > > > typedef struct VhostUserMemory { > > @@ -57,6 +62,16 @@ typedef struct VhostUserMemory { > > VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > > } VhostUserMemory; > > > > +typedef struct HugepageMemoryInfo { > > + uint64_t base_addr; > > + uint64_t size; > > +}HugeMemInfo; > > + > > +typedef struct HugepageInfo { > > + uint32_t num; > > + HugeMemInfo files[HUGEPAGE_MAX_FILES]; > > +}HugepageInfo; > > + > > typedef struct VhostUserMsg { > > VhostUserRequest request; > > > > @@ -71,6 +86,7 @@ typedef struct VhostUserMsg { > > struct vhost_vring_state state; > > struct vhost_vring_addr addr; > > VhostUserMemory memory; > > + HugepageInfo huge_info; > > }; > > } QEMU_PACKED VhostUserMsg; > > > > @@ -104,7 +120,9 @@ static unsigned long int > > ioctl_to_vhost_user_request[VHOST_USER_MAX] = { > > VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ > > VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ > > VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ > > - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ > > + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */ > > + VHOST_MMAP_HUGEPAGE_FILE, /* VHOST_USER_MMAP_HUGEPAGE_FILE */ > > + VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */ > > }; > > > > static VhostUserRequest vhost_user_request_translate(unsigned long int > > request) > > @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, > > unsigned long int request, > > int fds[VHOST_MEMORY_MAX_NREGIONS]; > > int i, fd; > > size_t fd_num = 0; > > + RAMBlock *block; > > > > assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); > > > > @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, > > unsigned long int request, > > case VHOST_RESET_OWNER: > > break; > > > > - case VHOST_SET_MEM_TABLE: > > - for (i = 0; i < dev->mem->nregions; ++i) { > > - struct vhost_memory_region *reg = dev->mem->regions + i; > > - ram_addr_t ram_addr; > > + case VHOST_MMAP_HUGEPAGE_FILE: > > + qemu_mutex_lock_ramlist(); > > > > - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); > > - qemu_ram_addr_from_host((void > > *)(uintptr_t)reg->userspace_addr, &ram_addr); > > - fd = qemu_get_ram_fd(ram_addr); > > - if (fd > 0) { > > - msg.memory.regions[fd_num].userspace_addr = > > reg->userspace_addr; > > - msg.memory.regions[fd_num].memory_size = reg->memory_size; > > - msg.memory.regions[fd_num].guest_phys_addr = > > reg->guest_phys_addr; > > - msg.memory.regions[fd_num].mmap_offset = > > reg->userspace_addr - > > - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); > > - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); > > - fds[fd_num++] = fd; > > + /* Get hugepage file informations */ > > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > > + if (block->flags & RAM_SHARED && block->fd > 0) { > > + msg.huge_info.files[fd_num].size = block->length; > > + msg.huge_info.files[fd_num].base_addr = block->host; > > + fds[fd_num++] = block->fd; > > } > > } > > + msg.huge_info.num = fd_num; > > > > - msg.memory.nregions = fd_num; > > + /* Calculate msg size */ > > + msg.size = sizeof(m.huge_info.num); > > + msg.size += fd_num * sizeof(HugeMemInfo); > > + > > + qemu_mutex_unlock_ramlist(); > > + break; > > > > - if (!fd_num) { > > - error_report("Failed initializing vhost-user memory map\n" > > - "consider using -object memory-backend-file > > share=on\n"); > > - return -1; > > + case VHOST_UNMAP_HUGEPAGE_FILE: > > + /* Tell vhost-user to unmap all hugepage files. */ > > + break; > > + > > + case VHOST_SET_MEM_TABLE: > > + for (i = 0; i < dev->mem->nregions; i++) { > > + struct vhost_memory_region *reg = dev->mem->regions + i; > > + > > + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); > > + > > + msg.memory.regions[i].userspace_addr = reg->userspace_addr; > > + msg.memory.regions[i].memory_size = reg->memory_size; > > + msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr; > > + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); > > } > > > > + msg.memory.nregions = i; > > msg.size = sizeof(m.memory.nregions); > > msg.size += sizeof(m.memory.padding); > > - msg.size += fd_num * sizeof(VhostUserMemoryRegion); > > - > > + msg.size += i * sizeof(VhostUserMemoryRegion); > > break; > > > > case VHOST_SET_LOG_FD: > > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > > index 5a12861..b8eb341 100644 > > --- a/hw/virtio/vhost.c > > +++ b/hw/virtio/vhost.c > > @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, > > VirtIODevice *vdev) > > if (r < 0) { > > goto fail_features; > > } > > + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { > > + r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, > > + NULL); > > + if (r < 0) { > > + r = -errno; > > + goto fail_mem; > > + } > > + } > > r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); > > if (r < 0) { > > r = -errno; > > @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, > > VirtIODevice *vdev) > > g_free(hdev->log); > > hdev->log = NULL; > > hdev->log_size = 0; > > + > > + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { > > + (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, > > + NULL); > > + } > > } > > > > diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h > > index c656f61..bb72811 100644 > > --- a/linux-headers/linux/vhost.h > > +++ b/linux-headers/linux/vhost.h > > @@ -113,6 +113,13 @@ struct vhost_memory { > > /* Set eventfd to signal an error */ > > #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct > > vhost_vring_file) > > > > +/* Tell vhost-user to mmap hugepage file */ > > +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int) > > +/* Tell vhost-user to unmap hugepage file */ > > +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int) > > + > > +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread) > > + > > /* VHOST_NET specific defines */ > > > > /* Attach virtio net ring to a raw socket, or tap device. > >