On Mon, Feb 02, 2015 at 12:29:47AM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/12/2014 07:02, haifeng....@huawei.com wrote:
> > From: linhaifeng <haifeng....@huawei.com>
> > 
> > If we create VM with two or more numa nodes qemu will create two
> > or more hugepage files but qemu only send one hugepage file fd
> > to vhost-user when VM's memory size is 2G and with two numa nodes.
> > 
> > Signed-off-by: linhaifeng <haifeng....@huawei.com>
> 
> The bug is in vhost_dev_assign_memory.  It doesn't check that the file
> descriptor matches when merging regions.  Michael, does the merging
> trigger in practice?  Can we just eliminate it?
> 
> Paolo

I'm not sure: does memory core ever give us two adjacent
RAM segments that we *can* merge?
If yes it would trigger, and extra memory slots slow down lookups
linearly so they aren't free.

> > ---
> >  hw/virtio/vhost-user.c      | 78 
> > ++++++++++++++++++++++++++++++---------------
> >  hw/virtio/vhost.c           | 13 ++++++++
> >  linux-headers/linux/vhost.h |  7 ++++
> >  3 files changed, 73 insertions(+), 25 deletions(-)
> > 
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index aefe0bb..439cbba 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -24,6 +24,10 @@
> >  #include <linux/vhost.h>
> >  
> >  #define VHOST_MEMORY_MAX_NREGIONS    8
> > +/* FIXME: same as the max number of numa node?*/
> > +#define HUGEPAGE_MAX_FILES           8
> > +
> > +#define RAM_SHARED     (1 << 1)
> >  
> >  typedef enum VhostUserRequest {
> >      VHOST_USER_NONE = 0,
> > @@ -41,14 +45,15 @@ typedef enum VhostUserRequest {
> >      VHOST_USER_SET_VRING_KICK = 12,
> >      VHOST_USER_SET_VRING_CALL = 13,
> >      VHOST_USER_SET_VRING_ERR = 14,
> > -    VHOST_USER_MAX
> > +    VHOST_USER_MMAP_HUGEPAGE_FILE = 15,
> > +    VHOST_USER_UNMAP_HUGEPAGE_FILE = 16,
> > +    VHOST_USER_MAX,
> >  } VhostUserRequest;
> >  
> >  typedef struct VhostUserMemoryRegion {
> >      uint64_t guest_phys_addr;
> >      uint64_t memory_size;
> >      uint64_t userspace_addr;
> > -    uint64_t mmap_offset;
> >  } VhostUserMemoryRegion;
> >  
> >  typedef struct VhostUserMemory {
> > @@ -57,6 +62,16 @@ typedef struct VhostUserMemory {
> >      VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> >  } VhostUserMemory;
> >  
> > +typedef struct HugepageMemoryInfo {
> > +    uint64_t base_addr;
> > +    uint64_t size;
> > +}HugeMemInfo;
> > +
> > +typedef struct HugepageInfo {
> > +    uint32_t num;
> > +    HugeMemInfo files[HUGEPAGE_MAX_FILES];
> > +}HugepageInfo;
> > +
> >  typedef struct VhostUserMsg {
> >      VhostUserRequest request;
> >  
> > @@ -71,6 +86,7 @@ typedef struct VhostUserMsg {
> >          struct vhost_vring_state state;
> >          struct vhost_vring_addr addr;
> >          VhostUserMemory memory;
> > +        HugepageInfo huge_info;
> >      };
> >  } QEMU_PACKED VhostUserMsg;
> >  
> > @@ -104,7 +120,9 @@ static unsigned long int 
> > ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
> >      VHOST_GET_VRING_BASE,   /* VHOST_USER_GET_VRING_BASE */
> >      VHOST_SET_VRING_KICK,   /* VHOST_USER_SET_VRING_KICK */
> >      VHOST_SET_VRING_CALL,   /* VHOST_USER_SET_VRING_CALL */
> > -    VHOST_SET_VRING_ERR     /* VHOST_USER_SET_VRING_ERR */
> > +    VHOST_SET_VRING_ERR,    /* VHOST_USER_SET_VRING_ERR */
> > +    VHOST_MMAP_HUGEPAGE_FILE,  /* VHOST_USER_MMAP_HUGEPAGE_FILE */
> > +    VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */
> >  };
> >  
> >  static VhostUserRequest vhost_user_request_translate(unsigned long int 
> > request)
> > @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, 
> > unsigned long int request,
> >      int fds[VHOST_MEMORY_MAX_NREGIONS];
> >      int i, fd;
> >      size_t fd_num = 0;
> > +    RAMBlock *block;
> >  
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> >  
> > @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, 
> > unsigned long int request,
> >      case VHOST_RESET_OWNER:
> >          break;
> >  
> > -    case VHOST_SET_MEM_TABLE:
> > -        for (i = 0; i < dev->mem->nregions; ++i) {
> > -            struct vhost_memory_region *reg = dev->mem->regions + i;
> > -            ram_addr_t ram_addr;
> > +    case VHOST_MMAP_HUGEPAGE_FILE:
> > +        qemu_mutex_lock_ramlist();
> >  
> > -            assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> > -            qemu_ram_addr_from_host((void 
> > *)(uintptr_t)reg->userspace_addr, &ram_addr);
> > -            fd = qemu_get_ram_fd(ram_addr);
> > -            if (fd > 0) {
> > -                msg.memory.regions[fd_num].userspace_addr = 
> > reg->userspace_addr;
> > -                msg.memory.regions[fd_num].memory_size  = reg->memory_size;
> > -                msg.memory.regions[fd_num].guest_phys_addr = 
> > reg->guest_phys_addr;
> > -                msg.memory.regions[fd_num].mmap_offset = 
> > reg->userspace_addr -
> > -                    (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
> > -                assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> > -                fds[fd_num++] = fd;
> > +        /* Get hugepage file informations */
> > +        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > +            if (block->flags & RAM_SHARED && block->fd > 0) {
> > +                msg.huge_info.files[fd_num].size = block->length;
> > +                msg.huge_info.files[fd_num].base_addr = block->host;
> > +                fds[fd_num++] = block->fd;
> >              }
> >          }
> > +        msg.huge_info.num = fd_num;
> >  
> > -        msg.memory.nregions = fd_num;
> > +        /* Calculate msg size */
> > +        msg.size = sizeof(m.huge_info.num);
> > +        msg.size += fd_num * sizeof(HugeMemInfo);
> > +        
> > +        qemu_mutex_unlock_ramlist();        
> > +        break;
> >  
> > -        if (!fd_num) {
> > -            error_report("Failed initializing vhost-user memory map\n"
> > -                    "consider using -object memory-backend-file 
> > share=on\n");
> > -            return -1;
> > +    case VHOST_UNMAP_HUGEPAGE_FILE:
> > +        /* Tell vhost-user to unmap all hugepage files. */
> > +        break;
> > +
> > +    case VHOST_SET_MEM_TABLE:
> > +        for (i = 0; i < dev->mem->nregions; i++) {
> > +            struct vhost_memory_region *reg = dev->mem->regions + i;
> > +
> > +            assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> > +
> > +            msg.memory.regions[i].userspace_addr = reg->userspace_addr;
> > +            msg.memory.regions[i].memory_size  = reg->memory_size;
> > +            msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr;
> > +            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> >          }
> >  
> > +        msg.memory.nregions = i;
> >          msg.size = sizeof(m.memory.nregions);
> >          msg.size += sizeof(m.memory.padding);
> > -        msg.size += fd_num * sizeof(VhostUserMemoryRegion);
> > -
> > +        msg.size += i * sizeof(VhostUserMemoryRegion);
> >          break;
> >  
> >      case VHOST_SET_LOG_FD:
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 5a12861..b8eb341 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, 
> > VirtIODevice *vdev)
> >      if (r < 0) {
> >          goto fail_features;
> >      }
> > +    if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
> > +        r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, 
> > +            NULL);
> > +        if (r < 0) {
> > +            r = -errno;
> > +            goto fail_mem;
> > +        }    
> > +    }
> >      r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
> >      if (r < 0) {
> >          r = -errno;
> > @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, 
> > VirtIODevice *vdev)
> >      g_free(hdev->log);
> >      hdev->log = NULL;
> >      hdev->log_size = 0;
> > +
> > +    if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
> > +        (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, 
> > +            NULL);   
> > +    }    
> >  }
> >  
> > diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
> > index c656f61..bb72811 100644
> > --- a/linux-headers/linux/vhost.h
> > +++ b/linux-headers/linux/vhost.h
> > @@ -113,6 +113,13 @@ struct vhost_memory {
> >  /* Set eventfd to signal an error */
> >  #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct 
> > vhost_vring_file)
> >  
> > +/* Tell vhost-user to mmap hugepage file */
> > +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int)
> > +/* Tell vhost-user to unmap hugepage file */
> > +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int)
> > +
> > +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread)
> > +
> >  /* VHOST_NET specific defines */
> >  
> >  /* Attach virtio net ring to a raw socket, or tap device.
> > 

Reply via email to