During migration devices continue writing to the guest's memory.
These writes have to be reported to QEMU. The change implements
minimal support in vhost-user-bridge required for successful
migration of a guest with virtio-net device.

Signed-off-by: Victor Kaplansky <vict...@redhat.com>
---
 tests/vhost-user-bridge.c | 168 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 156 insertions(+), 12 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index fa18ad5..8bab0bb 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -173,6 +173,9 @@ typedef struct VubrVirtq {
 #define VHOST_MEMORY_MAX_NREGIONS    8
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 
+typedef uint8_t vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 4096
+
 enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_MQ = 0,
     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -265,8 +268,12 @@ typedef struct VubrDev {
     uint32_t nregions;
     VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
     VubrVirtq vq[MAX_NR_VIRTQUEUE];
+    int log_call_fd;
+    uint64_t log_size;
+    vhost_log_chunk_t *log_table;
     int backend_udp_sock;
     struct sockaddr_in backend_udp_dest;
+    int ready;
 } VubrDev;
 
 static const char *vubr_request_str[] = {
@@ -329,6 +336,27 @@ gpa_to_va(VubrDev *dev, uint64_t guest_addr)
     return 0;
 }
 
+/* Translate our virtual address to guest physical address.  */
+static uint64_t
+va_to_gpa(VubrDev *dev, void *obj)
+{
+    int i;
+    uint64_t va = (uint64_t) obj;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VubrDevRegion *r = &dev->regions[i];
+
+        if ((va >= r->mmap_addr + r->mmap_offset) &&
+            (va < r->mmap_addr + r->mmap_offset + r->size)) {
+            return va - r->mmap_addr - r->mmap_offset + r->gpa;
+        }
+    }
+
+    assert(!"address not found in regions");
+    return 0;
+}
+
 /* Translate qemu virtual address to our virtual address.  */
 static uint64_t
 qva_to_va(VubrDev *dev, uint64_t qemu_addr)
@@ -368,7 +396,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     rc = recvmsg(conn_fd, &msg, 0);
 
-    if (rc <= 0) {
+    if (rc == 0) {
+        vubr_die("recvmsg");
+        fprintf(stderr, "Peer disconnected.\n");
+        exit(1);
+    }
+    if (rc < 0) {
         vubr_die("recvmsg");
     }
 
@@ -395,7 +428,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
     if (vmsg->size) {
         rc = read(conn_fd, &vmsg->payload, vmsg->size);
-        if (rc <= 0) {
+        if (rc == 0) {
+            vubr_die("recvmsg");
+            fprintf(stderr, "Peer disconnected.\n");
+            exit(1);
+        }
+        if (rc < 0) {
             vubr_die("recvmsg");
         }
 
@@ -465,6 +503,32 @@ vubr_virtqueue_kick(VubrVirtq *vq)
     }
 }
 
+
+static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+    log_table[page / 8] |= 1 << (page % 8);
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+    uint64_t page;
+
+    if (!dev->log_table || !length) {
+        return;
+    }
+
+    assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
+
+    page = address / VHOST_LOG_PAGE;
+    while (page * VHOST_LOG_PAGE < address + length) {
+        vubr_log_page(dev->log_table, page);
+        page += VHOST_LOG_PAGE;
+    }
+}
+
 static void
 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 {
@@ -510,6 +574,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, 
int32_t len)
 
     if (len <= chunk_len) {
         memcpy(chunk_start, buf, len);
+        vubr_log_write(dev, desc[i].addr, len);
     } else {
         fprintf(stderr,
                 "Received too long packet from the backend. Dropping...\n");
@@ -519,11 +584,14 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t 
*buf, int32_t len)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev, va_to_gpa(dev, &used->ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vq->last_avail_index++;
     vq->last_used_index++;
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev, va_to_gpa(dev, &used->idx), sizeof(used->idx));
 
     /* Kick the guest if necessary. */
     vubr_virtqueue_kick(vq);
@@ -552,6 +620,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
         void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
         uint32_t chunk_len = desc[i].len;
 
+        assert(!(desc[i].flags & VRING_DESC_F_WRITE));
+
         if (len + chunk_len < buf_size) {
             memcpy(buf + len, chunk_start, chunk_len);
             DPRINT("%d ", chunk_len);
@@ -577,6 +647,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
     /* Add descriptor to the used ring. */
     used->ring[u_index].id = d_index;
     used->ring[u_index].len = len;
+    vubr_log_write(dev, va_to_gpa(dev, &used->ring[u_index]),
+                   sizeof(used->ring[u_index]));
 
     vubr_consume_raw_packet(dev, buf, len);
 
@@ -596,6 +668,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
     }
 
     atomic_mb_set(&used->idx, vq->last_used_index);
+    vubr_log_write(dev, va_to_gpa(dev, &used->idx), sizeof(used->idx));
 }
 
 static void
@@ -609,6 +682,10 @@ vubr_backend_recv_cb(int sock, void *ctx)
     int buflen = sizeof(buf);
     int len;
 
+    if (!dev->ready) {
+        return;
+    }
+
     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
 
     uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
@@ -656,9 +733,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
     vmsg->payload.u64 =
             ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
-             (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-             (1ULL << VIRTIO_NET_F_CTRL_RX) |
-             (1ULL << VHOST_F_LOG_ALL));
+             (1ULL << VHOST_F_LOG_ALL) |
+             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
+
     vmsg->size = sizeof(vmsg->payload.u64);
 
     DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@@ -680,10 +757,27 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
     return 0;
 }
 
+static void
+vubr_close_log(VubrDev *dev)
+{
+    if (dev->log_table) {
+        if (munmap(dev->log_table, dev->log_size) != 0) {
+            vubr_die("munmap()");
+        }
+
+        dev->log_table = 0;
+    }
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+        dev->log_call_fd = -1;
+    }
+}
+
 static int
 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
+    vubr_close_log(dev);
+    dev->ready = 0;
     return 0;
 }
 
@@ -736,8 +830,39 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    int fd, i;
+    uint64_t last_addr = 0;
+    uint64_t log_size;
+    void *rc;
+
+    assert(vmsg->fd_num == 1);
+    fd = vmsg->fds[0];
+
+    /* Calculate size of vlog_table. Each entry is 32 bits. */
+    for (i = 0; i < dev->nregions; i++) {
+        VubrDevRegion *dev_region = &dev->regions[i];
+
+        uint64_t last_region_addr = dev_region->gpa + dev_region->size;
+
+        if (last_addr < last_region_addr) {
+            last_addr = last_region_addr;
+        }
+    }
+
+    log_size = last_addr / (VHOST_LOG_PAGE * sizeof(vhost_log_chunk_t));
+    DPRINT("Largest guest address: 0x%016"PRIx64"\n", last_addr);
+    DPRINT("Log size: %"PRId64"\n", log_size);
+
+    rc = mmap(0, log_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+    if (rc == MAP_FAILED) {
+        vubr_die("mmap");
+    }
+    dev->log_table = (vhost_log_chunk_t *) rc;
+    dev->log_size = log_size;
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -803,8 +928,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    unsigned int index = vmsg->payload.state.index;
+
+    DPRINT("State.index: %d\n", index);
+    vmsg->payload.state.num = dev->vq[index].last_avail_index;
+    vmsg->size = sizeof(vmsg->payload.state);
+
+    /* reply */
+    return 1;
 }
 
 static int
@@ -829,6 +960,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
         DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
                dev->vq[index].kick_fd, index);
     }
+    if (dev->vq[0].kick_fd != -1 &&
+        dev->vq[1].kick_fd != -1) {
+        dev->ready = 1;
+    }
     return 0;
 }
 
@@ -858,9 +993,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
 static int
 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
 {
-    /* FIXME: unimplented */
+    vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    /* Reply */
+    return 1;
 }
 
 static int
@@ -1012,6 +1150,12 @@ vubr_new(const char *path)
         };
     }
 
+    /* Init log */
+    dev->log_call_fd = -1;
+    dev->log_size    = 0;
+    dev->log_table   = 0;
+    dev->ready = 0;
+
     /* Get a UNIX socket. */
     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
     if (dev->sock == -1) {
-- 
--Victor

Reply via email to