During migration devices continue writing to the guest's memory. These writes have to be reported to QEMU. The change implements minimal support in vhost-user-bridge required for successful migration of a guest with virtio-net device.
Signed-off-by: Victor Kaplansky <vict...@redhat.com> --- tests/vhost-user-bridge.c | 168 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 156 insertions(+), 12 deletions(-) diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index fa18ad5..8bab0bb 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -173,6 +173,9 @@ typedef struct VubrVirtq { #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +typedef uint8_t vhost_log_chunk_t; +#define VHOST_LOG_PAGE 4096 + enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_MQ = 0, VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, @@ -265,8 +268,12 @@ typedef struct VubrDev { uint32_t nregions; VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; VubrVirtq vq[MAX_NR_VIRTQUEUE]; + int log_call_fd; + uint64_t log_size; + vhost_log_chunk_t *log_table; int backend_udp_sock; struct sockaddr_in backend_udp_dest; + int ready; } VubrDev; static const char *vubr_request_str[] = { @@ -329,6 +336,27 @@ gpa_to_va(VubrDev *dev, uint64_t guest_addr) return 0; } +/* Translate our virtual address to guest physical address. */ +static uint64_t +va_to_gpa(VubrDev *dev, void *obj) +{ + int i; + uint64_t va = (uint64_t) obj; + + /* Find matching memory region. */ + for (i = 0; i < dev->nregions; i++) { + VubrDevRegion *r = &dev->regions[i]; + + if ((va >= r->mmap_addr + r->mmap_offset) && + (va < r->mmap_addr + r->mmap_offset + r->size)) { + return va - r->mmap_addr - r->mmap_offset + r->gpa; + } + } + + assert(!"address not found in regions"); + return 0; +} + /* Translate qemu virtual address to our virtual address. */ static uint64_t qva_to_va(VubrDev *dev, uint64_t qemu_addr) @@ -368,7 +396,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) rc = recvmsg(conn_fd, &msg, 0); - if (rc <= 0) { + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { vubr_die("recvmsg"); } @@ -395,7 +428,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) if (vmsg->size) { rc = read(conn_fd, &vmsg->payload, vmsg->size); - if (rc <= 0) { + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { vubr_die("recvmsg"); } @@ -465,6 +503,32 @@ vubr_virtqueue_kick(VubrVirtq *vq) } } + +static void +vubr_log_page(uint8_t *log_table, uint64_t page) +{ + DPRINT("Logged dirty guest page: %"PRId64"\n", page); + log_table[page / 8] |= 1 << (page % 8); +} + +static void +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) +{ + uint64_t page; + + if (!dev->log_table || !length) { + return; + } + + assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8)); + + page = address / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < address + length) { + vubr_log_page(dev->log_table, page); + page += VHOST_LOG_PAGE; + } +} + static void vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) { @@ -510,6 +574,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) if (len <= chunk_len) { memcpy(chunk_start, buf, len); + vubr_log_write(dev, desc[i].addr, len); } else { fprintf(stderr, "Received too long packet from the backend. Dropping...\n"); @@ -519,11 +584,14 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) /* Add descriptor to the used ring. */ used->ring[u_index].id = d_index; used->ring[u_index].len = len; + vubr_log_write(dev, va_to_gpa(dev, &used->ring[u_index]), + sizeof(used->ring[u_index])); vq->last_avail_index++; vq->last_used_index++; atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, va_to_gpa(dev, &used->idx), sizeof(used->idx)); /* Kick the guest if necessary. */ vubr_virtqueue_kick(vq); @@ -552,6 +620,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq) void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); uint32_t chunk_len = desc[i].len; + assert(!(desc[i].flags & VRING_DESC_F_WRITE)); + if (len + chunk_len < buf_size) { memcpy(buf + len, chunk_start, chunk_len); DPRINT("%d ", chunk_len); @@ -577,6 +647,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq) /* Add descriptor to the used ring. */ used->ring[u_index].id = d_index; used->ring[u_index].len = len; + vubr_log_write(dev, va_to_gpa(dev, &used->ring[u_index]), + sizeof(used->ring[u_index])); vubr_consume_raw_packet(dev, buf, len); @@ -596,6 +668,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq) } atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, va_to_gpa(dev, &used->idx), sizeof(used->idx)); } static void @@ -609,6 +682,10 @@ vubr_backend_recv_cb(int sock, void *ctx) int buflen = sizeof(buf); int len; + if (!dev->ready) { + return; + } + DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); @@ -656,9 +733,9 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { vmsg->payload.u64 = ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_NET_F_CTRL_VQ) | - (1ULL << VIRTIO_NET_F_CTRL_RX) | - (1ULL << VHOST_F_LOG_ALL)); + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); + vmsg->size = sizeof(vmsg->payload.u64); DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); @@ -680,10 +757,27 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) return 0; } +static void +vubr_close_log(VubrDev *dev) +{ + if (dev->log_table) { + if (munmap(dev->log_table, dev->log_size) != 0) { + vubr_die("munmap()"); + } + + dev->log_table = 0; + } + if (dev->log_call_fd != -1) { + close(dev->log_call_fd); + dev->log_call_fd = -1; + } +} + static int vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); + vubr_close_log(dev); + dev->ready = 0; return 0; } @@ -736,8 +830,39 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); - return 0; + int fd, i; + uint64_t last_addr = 0; + uint64_t log_size; + void *rc; + + assert(vmsg->fd_num == 1); + fd = vmsg->fds[0]; + + /* Calculate size of vlog_table. Each entry is 32 bits. */ + for (i = 0; i < dev->nregions; i++) { + VubrDevRegion *dev_region = &dev->regions[i]; + + uint64_t last_region_addr = dev_region->gpa + dev_region->size; + + if (last_addr < last_region_addr) { + last_addr = last_region_addr; + } + } + + log_size = last_addr / (VHOST_LOG_PAGE * sizeof(vhost_log_chunk_t)); + DPRINT("Largest guest address: 0x%016"PRIx64"\n", last_addr); + DPRINT("Log size: %"PRId64"\n", log_size); + + rc = mmap(0, log_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (rc == MAP_FAILED) { + vubr_die("mmap"); + } + dev->log_table = (vhost_log_chunk_t *) rc; + dev->log_size = log_size; + + vmsg->size = sizeof(vmsg->payload.u64); + /* Reply */ + return 1; } static int @@ -803,8 +928,14 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); - return 0; + unsigned int index = vmsg->payload.state.index; + + DPRINT("State.index: %d\n", index); + vmsg->payload.state.num = dev->vq[index].last_avail_index; + vmsg->size = sizeof(vmsg->payload.state); + + /* reply */ + return 1; } static int @@ -829,6 +960,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) DPRINT("Waiting for kicks on fd: %d for vq: %d\n", dev->vq[index].kick_fd, index); } + if (dev->vq[0].kick_fd != -1 && + dev->vq[1].kick_fd != -1) { + dev->ready = 1; + } return 0; } @@ -858,9 +993,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { - /* FIXME: unimplented */ + vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - return 0; + vmsg->size = sizeof(vmsg->payload.u64); + + /* Reply */ + return 1; } static int @@ -1012,6 +1150,12 @@ vubr_new(const char *path) }; } + /* Init log */ + dev->log_call_fd = -1; + dev->log_size = 0; + dev->log_table = 0; + dev->ready = 0; + /* Get a UNIX socket. */ dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); if (dev->sock == -1) { -- --Victor