From: linhaifeng <haifeng....@huawei.com>

If we create VM with two or more numa nodes qemu will create two
or more hugepage files but qemu only send one hugepage file fd
to vhost-user when VM's memory size is 2G and with two numa nodes.

Signed-off-by: linhaifeng <haifeng....@huawei.com>
---
 hw/virtio/vhost-user.c      | 78 ++++++++++++++++++++++++++++++---------------
 hw/virtio/vhost.c           | 13 ++++++++
 linux-headers/linux/vhost.h |  7 ++++
 3 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index aefe0bb..439cbba 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -24,6 +24,10 @@
 #include <linux/vhost.h>
 
 #define VHOST_MEMORY_MAX_NREGIONS    8
+/* FIXME: same as the max number of numa node?*/
+#define HUGEPAGE_MAX_FILES           8
+
+#define RAM_SHARED     (1 << 1)
 
 typedef enum VhostUserRequest {
     VHOST_USER_NONE = 0,
@@ -41,14 +45,15 @@ typedef enum VhostUserRequest {
     VHOST_USER_SET_VRING_KICK = 12,
     VHOST_USER_SET_VRING_CALL = 13,
     VHOST_USER_SET_VRING_ERR = 14,
-    VHOST_USER_MAX
+    VHOST_USER_MMAP_HUGEPAGE_FILE = 15,
+    VHOST_USER_UNMAP_HUGEPAGE_FILE = 16,
+    VHOST_USER_MAX,
 } VhostUserRequest;
 
 typedef struct VhostUserMemoryRegion {
     uint64_t guest_phys_addr;
     uint64_t memory_size;
     uint64_t userspace_addr;
-    uint64_t mmap_offset;
 } VhostUserMemoryRegion;
 
 typedef struct VhostUserMemory {
@@ -57,6 +62,16 @@ typedef struct VhostUserMemory {
     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 } VhostUserMemory;
 
+typedef struct HugepageMemoryInfo {
+    uint64_t base_addr;
+    uint64_t size;
+}HugeMemInfo;
+
+typedef struct HugepageInfo {
+    uint32_t num;
+    HugeMemInfo files[HUGEPAGE_MAX_FILES];
+}HugepageInfo;
+
 typedef struct VhostUserMsg {
     VhostUserRequest request;
 
@@ -71,6 +86,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
+        HugepageInfo huge_info;
     };
 } QEMU_PACKED VhostUserMsg;
 
@@ -104,7 +120,9 @@ static unsigned long int 
ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
     VHOST_GET_VRING_BASE,   /* VHOST_USER_GET_VRING_BASE */
     VHOST_SET_VRING_KICK,   /* VHOST_USER_SET_VRING_KICK */
     VHOST_SET_VRING_CALL,   /* VHOST_USER_SET_VRING_CALL */
-    VHOST_SET_VRING_ERR     /* VHOST_USER_SET_VRING_ERR */
+    VHOST_SET_VRING_ERR,    /* VHOST_USER_SET_VRING_ERR */
+    VHOST_MMAP_HUGEPAGE_FILE,  /* VHOST_USER_MMAP_HUGEPAGE_FILE */
+    VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */
 };
 
 static VhostUserRequest vhost_user_request_translate(unsigned long int request)
@@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned 
long int request,
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     int i, fd;
     size_t fd_num = 0;
+    RAMBlock *block;
 
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
 
@@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, 
unsigned long int request,
     case VHOST_RESET_OWNER:
         break;
 
-    case VHOST_SET_MEM_TABLE:
-        for (i = 0; i < dev->mem->nregions; ++i) {
-            struct vhost_memory_region *reg = dev->mem->regions + i;
-            ram_addr_t ram_addr;
+    case VHOST_MMAP_HUGEPAGE_FILE:
+        qemu_mutex_lock_ramlist();
 
-            assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
-            qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, 
&ram_addr);
-            fd = qemu_get_ram_fd(ram_addr);
-            if (fd > 0) {
-                msg.memory.regions[fd_num].userspace_addr = 
reg->userspace_addr;
-                msg.memory.regions[fd_num].memory_size  = reg->memory_size;
-                msg.memory.regions[fd_num].guest_phys_addr = 
reg->guest_phys_addr;
-                msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
-                    (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
-                assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
-                fds[fd_num++] = fd;
+        /* Get hugepage file informations */
+        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+            if (block->flags & RAM_SHARED && block->fd > 0) {
+                msg.huge_info.files[fd_num].size = block->length;
+                msg.huge_info.files[fd_num].base_addr = block->host;
+                fds[fd_num++] = block->fd;
             }
         }
+        msg.huge_info.num = fd_num;
 
-        msg.memory.nregions = fd_num;
+        /* Calculate msg size */
+        msg.size = sizeof(m.huge_info.num);
+        msg.size += fd_num * sizeof(HugeMemInfo);
+        
+        qemu_mutex_unlock_ramlist();        
+        break;
 
-        if (!fd_num) {
-            error_report("Failed initializing vhost-user memory map\n"
-                    "consider using -object memory-backend-file share=on\n");
-            return -1;
+    case VHOST_UNMAP_HUGEPAGE_FILE:
+        /* Tell vhost-user to unmap all hugepage files. */
+        break;
+
+    case VHOST_SET_MEM_TABLE:
+        for (i = 0; i < dev->mem->nregions; i++) {
+            struct vhost_memory_region *reg = dev->mem->regions + i;
+
+            assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
+
+            msg.memory.regions[i].userspace_addr = reg->userspace_addr;
+            msg.memory.regions[i].memory_size  = reg->memory_size;
+            msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr;
+            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
         }
 
+        msg.memory.nregions = i;
         msg.size = sizeof(m.memory.nregions);
         msg.size += sizeof(m.memory.padding);
-        msg.size += fd_num * sizeof(VhostUserMemoryRegion);
-
+        msg.size += i * sizeof(VhostUserMemoryRegion);
         break;
 
     case VHOST_SET_LOG_FD:
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 5a12861..b8eb341 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice 
*vdev)
     if (r < 0) {
         goto fail_features;
     }
+    if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
+        r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, 
+            NULL);
+        if (r < 0) {
+            r = -errno;
+            goto fail_mem;
+        }    
+    }
     r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
     if (r < 0) {
         r = -errno;
@@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice 
*vdev)
     g_free(hdev->log);
     hdev->log = NULL;
     hdev->log_size = 0;
+
+    if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
+        (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, 
+            NULL);   
+    }    
 }
 
diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
index c656f61..bb72811 100644
--- a/linux-headers/linux/vhost.h
+++ b/linux-headers/linux/vhost.h
@@ -113,6 +113,13 @@ struct vhost_memory {
 /* Set eventfd to signal an error */
 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
 
+/* Tell vhost-user to mmap hugepage file */
+#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int)
+/* Tell vhost-user to unmap hugepage file */
+#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int)
+
+#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread)
+
 /* VHOST_NET specific defines */
 
 /* Attach virtio net ring to a raw socket, or tap device.
-- 
1.9.0



Reply via email to