On 2015/1/29 18:51, Michael S. Tsirkin wrote: > On Thu, Jan 29, 2015 at 11:58:08AM +0800, Linhaifeng wrote: >> Hi,Michael S.Tsirkin >> >> The vhost-user device will not work if there are two numa nodes in VM. >> >> Should we fix this bug or ignore it ? > > I suggest we fix this bug. > I saw that you responded to self so I assume you will > send v2 with the modification you listed. > Did I get it right? > > Also, pls Cc qemu-stable on bugfixes. > > Thanks! >
Hi,Michael S.Tsirkin Yes,in v2 i want to write vhost-user-test.c to show how to mmap and how to calculate address then you and other one can review it.But i don't know how to compile and run the test. Is there any document to show it? BTW.I know you wrote virtio_net driver and vhost. Is there any document to introduce the principle or how the virtio and vhost work? >> On 2014/12/18 13:06, Linhaifeng wrote: >>> >>> >>> On 2014/12/17 14:02, haifeng....@huawei.com wrote: >>>> From: linhaifeng <haifeng....@huawei.com> >>>> >>>> If we create VM with two or more numa nodes qemu will create two >>>> or more hugepage files but qemu only send one hugepage file fd >>>> to vhost-user when VM's memory size is 2G and with two numa nodes. >>>> >>>> Signed-off-by: linhaifeng <haifeng....@huawei.com> >>>> --- >>>> hw/virtio/vhost-user.c | 78 >>>> ++++++++++++++++++++++++++++++--------------- >>>> hw/virtio/vhost.c | 13 ++++++++ >>>> linux-headers/linux/vhost.h | 7 ++++ >>>> 3 files changed, 73 insertions(+), 25 deletions(-) >>>> >>>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c >>>> index aefe0bb..439cbba 100644 >>>> --- a/hw/virtio/vhost-user.c >>>> +++ b/hw/virtio/vhost-user.c >>>> @@ -24,6 +24,10 @@ >>>> #include <linux/vhost.h> >>>> >>>> #define VHOST_MEMORY_MAX_NREGIONS 8 >>>> +/* FIXME: same as the max number of numa node?*/ >>>> +#define HUGEPAGE_MAX_FILES 8 >>>> + >>>> +#define RAM_SHARED (1 << 1) >>>> >>>> typedef enum VhostUserRequest { >>>> VHOST_USER_NONE = 0, >>>> @@ -41,14 +45,15 @@ typedef enum VhostUserRequest { >>>> VHOST_USER_SET_VRING_KICK = 12, >>>> VHOST_USER_SET_VRING_CALL = 13, >>>> VHOST_USER_SET_VRING_ERR = 14, >>>> - VHOST_USER_MAX >>>> + VHOST_USER_MMAP_HUGEPAGE_FILE = 15, >>>> + VHOST_USER_UNMAP_HUGEPAGE_FILE = 16, >>>> + VHOST_USER_MAX, >>>> } VhostUserRequest; >>>> >>>> typedef struct VhostUserMemoryRegion { >>>> uint64_t guest_phys_addr; >>>> uint64_t memory_size; >>>> uint64_t userspace_addr; >>>> - uint64_t mmap_offset; >>>> } VhostUserMemoryRegion; >>>> >>>> typedef struct VhostUserMemory { >>>> @@ -57,6 +62,16 @@ typedef struct VhostUserMemory { >>>> VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; >>>> } VhostUserMemory; >>>> >>>> +typedef struct HugepageMemoryInfo { >>>> + uint64_t base_addr; >>>> + uint64_t size; >>>> +}HugeMemInfo; >>>> + >>>> +typedef struct HugepageInfo { >>>> + uint32_t num; >>>> + HugeMemInfo files[HUGEPAGE_MAX_FILES]; >>>> +}HugepageInfo; >>>> + >>>> typedef struct VhostUserMsg { >>>> VhostUserRequest request; >>>> >>>> @@ -71,6 +86,7 @@ typedef struct VhostUserMsg { >>>> struct vhost_vring_state state; >>>> struct vhost_vring_addr addr; >>>> VhostUserMemory memory; >>>> + HugepageInfo huge_info; >>>> }; >>>> } QEMU_PACKED VhostUserMsg; >>>> >>>> @@ -104,7 +120,9 @@ static unsigned long int >>>> ioctl_to_vhost_user_request[VHOST_USER_MAX] = { >>>> VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ >>>> VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ >>>> VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ >>>> - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ >>>> + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */ >>>> + VHOST_MMAP_HUGEPAGE_FILE, /* VHOST_USER_MMAP_HUGEPAGE_FILE */ >>>> + VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */ >>>> }; >>>> >>>> static VhostUserRequest vhost_user_request_translate(unsigned long int >>>> request) >>>> @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, >>>> unsigned long int request, >>>> int fds[VHOST_MEMORY_MAX_NREGIONS]; >>>> int i, fd; >>>> size_t fd_num = 0; >>>> + RAMBlock *block; >>>> >>>> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); >>>> >>>> @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, >>>> unsigned long int request, >>>> case VHOST_RESET_OWNER: >>>> break; >>>> >>>> - case VHOST_SET_MEM_TABLE: >>>> - for (i = 0; i < dev->mem->nregions; ++i) { >>>> - struct vhost_memory_region *reg = dev->mem->regions + i; >>>> - ram_addr_t ram_addr; >>>> + case VHOST_MMAP_HUGEPAGE_FILE: >>>> + qemu_mutex_lock_ramlist(); >>>> >>>> - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); >>>> - qemu_ram_addr_from_host((void >>>> *)(uintptr_t)reg->userspace_addr, &ram_addr); >>>> - fd = qemu_get_ram_fd(ram_addr); >>>> - if (fd > 0) { >>>> - msg.memory.regions[fd_num].userspace_addr = >>>> reg->userspace_addr; >>>> - msg.memory.regions[fd_num].memory_size = >>>> reg->memory_size; >>>> - msg.memory.regions[fd_num].guest_phys_addr = >>>> reg->guest_phys_addr; >>>> - msg.memory.regions[fd_num].mmap_offset = >>>> reg->userspace_addr - >>>> - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); >>>> - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); >>>> - fds[fd_num++] = fd; >>>> + /* Get hugepage file informations */ >>>> + QTAILQ_FOREACH(block, &ram_list.blocks, next) { >>>> + if (block->flags & RAM_SHARED && block->fd > 0) { >>>> + msg.huge_info.files[fd_num].size = block->length; >>>> + msg.huge_info.files[fd_num].base_addr = block->host; >>>> + fds[fd_num++] = block->fd; >>>> } >>>> } >>>> + msg.huge_info.num = fd_num; >>>> >>>> - msg.memory.nregions = fd_num; >>>> + /* Calculate msg size */ >>>> + msg.size = sizeof(m.huge_info.num); >>>> + msg.size += fd_num * sizeof(HugeMemInfo); >>>> + >>>> + qemu_mutex_unlock_ramlist(); >>>> + break; >>>> >>>> - if (!fd_num) { >>>> - error_report("Failed initializing vhost-user memory map\n" >>>> - "consider using -object memory-backend-file >>>> share=on\n"); >>>> - return -1; >>>> + case VHOST_UNMAP_HUGEPAGE_FILE: >>>> + /* Tell vhost-user to unmap all hugepage files. */ >>>> + break; >>>> + >>>> + case VHOST_SET_MEM_TABLE: >>>> + for (i = 0; i < dev->mem->nregions; i++) { >>>> + struct vhost_memory_region *reg = dev->mem->regions + i; >>>> + >>>> + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); >>>> + >>>> + msg.memory.regions[i].userspace_addr = reg->userspace_addr; >>>> + msg.memory.regions[i].memory_size = reg->memory_size; >>>> + msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr; >>>> + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); >>>> } >>>> >>>> + msg.memory.nregions = i; >>>> msg.size = sizeof(m.memory.nregions); >>>> msg.size += sizeof(m.memory.padding); >>>> - msg.size += fd_num * sizeof(VhostUserMemoryRegion); >>>> - >>>> + msg.size += i * sizeof(VhostUserMemoryRegion); >>>> break; >>>> >>>> case VHOST_SET_LOG_FD: >>>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >>>> index 5a12861..b8eb341 100644 >>>> --- a/hw/virtio/vhost.c >>>> +++ b/hw/virtio/vhost.c >>>> @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, >>>> VirtIODevice *vdev) >>>> if (r < 0) { >>>> goto fail_features; >>>> } >>>> + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { >>>> + r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, >>>> + NULL); >>>> + if (r < 0) { >>>> + r = -errno; >>>> + goto fail_mem; >>>> + } >>>> + } >>>> r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); >>>> if (r < 0) { >>>> r = -errno; >>>> @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, >>>> VirtIODevice *vdev) >>>> g_free(hdev->log); >>>> hdev->log = NULL; >>>> hdev->log_size = 0; >>>> + >>>> + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { >>>> + (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, >>> >>> VHOST_MMAP_HUGEPAGE_FILE -> VHOST_UNMAP_HUGEPAGE_FILE >>> >>>> + NULL); >>>> + } >>>> } >>>> >>>> diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h >>>> index c656f61..bb72811 100644 >>>> --- a/linux-headers/linux/vhost.h >>>> +++ b/linux-headers/linux/vhost.h >>>> @@ -113,6 +113,13 @@ struct vhost_memory { >>>> /* Set eventfd to signal an error */ >>>> #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct >>>> vhost_vring_file) >>>> >>>> +/* Tell vhost-user to mmap hugepage file */ >>>> +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int) >>>> +/* Tell vhost-user to unmap hugepage file */ >>>> +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int) >>>> + >>>> +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct >>>> vhost_vring_thread) >>>> + >>>> /* VHOST_NET specific defines */ >>>> >>>> /* Attach virtio net ring to a raw socket, or tap device. >>>> >>> >> >> -- >> Regards, >> Haifeng > > . > -- Regards, Haifeng