[Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
During migration devices continue writing to the guest's memory. The writes has to be reported to QEMU. This change implements minimal support in vhost-user-bridge required for successful migration of a guest with virtio-net device. Signed-off-by: Victor Kaplansky--- v2: - use log_guest_addr for used ring reported by qemu instead of translating. - use mmap_size and mmap_offset defined in new VHOST_USER_SET_LOG_BASE interface. See the patch "vhost-user: modify SET_LOG_BASE to pass mmap size and offset". - start logging dirty pages only after the appropriate feature is set by a VHOST_USER_GET_PROTOCOL_FEATURES request. - updated TODO list. tests/vhost-user-bridge.c | 169 ++ 1 file changed, 155 insertions(+), 14 deletions(-) diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index fa18ad5..8c1c997 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -13,16 +13,22 @@ /* * TODO: * - main should get parameters from the command line. - * - implement all request handlers. + * - implement all request handlers. Still not implemented: + * vubr_set_protocol_features_exec() + * vubr_get_queue_num_exec() + * vubr_set_vring_enable_exec() + * vubr_send_rarp_exec() + * vubr_set_log_fd_exec() * - test for broken requests and virtqueue. * - implement features defined by Virtio 1.0 spec. * - support mergeable buffers and indirect descriptors. - * - implement RESET_DEVICE request. * - implement clean shutdown. * - implement non-blocking writes to UDP backend. * - implement polling strategy. */ +#define _FILE_OFFSET_BITS 64 + #include #include #include @@ -166,6 +172,7 @@ typedef struct VubrVirtq { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; +uint64_t log_guest_addr; } VubrVirtq; /* Based on qemu/hw/virtio/vhost-user.c */ @@ -173,6 +180,9 @@ typedef struct VubrVirtq { #define VHOST_MEMORY_MAX_NREGIONS8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +typedef uint8_t vhost_log_chunk_t; +#define VHOST_LOG_PAGE 4096 + enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_MQ = 0, VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, @@ -220,6 +230,11 @@ typedef struct VhostUserMemory { VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserLog { +uint64_t mmap_size; +uint64_t mmap_offset; +} VhostUserLog; + typedef struct VhostUserMsg { VhostUserRequest request; @@ -234,6 +249,7 @@ typedef struct VhostUserMsg { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; +VhostUserLog log; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; int fd_num; @@ -265,8 +281,13 @@ typedef struct VubrDev { uint32_t nregions; VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; VubrVirtq vq[MAX_NR_VIRTQUEUE]; +int log_call_fd; +uint64_t log_size; +vhost_log_chunk_t *log_table; int backend_udp_sock; struct sockaddr_in backend_udp_dest; +int ready; +uint64_t features; } VubrDev; static const char *vubr_request_str[] = { @@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) rc = recvmsg(conn_fd, , 0); -if (rc <= 0) { +if (rc == 0) { +vubr_die("recvmsg"); +fprintf(stderr, "Peer disconnected.\n"); +exit(1); +} +if (rc < 0) { vubr_die("recvmsg"); } @@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) if (vmsg->size) { rc = read(conn_fd, >payload, vmsg->size); -if (rc <= 0) { +if (rc == 0) { +vubr_die("recvmsg"); +fprintf(stderr, "Peer disconnected.\n"); +exit(1); +} +if (rc < 0) { vubr_die("recvmsg"); } @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq) } } + +static void +vubr_log_page(uint8_t *log_table, uint64_t page) +{ +DPRINT("Logged dirty guest page: %"PRId64"\n", page); +log_table[page / 8] |= 1 << (page % 8); +} + +static void +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) +{ +uint64_t page; + +if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) { +return; +} + +assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8)); + +page = address / VHOST_LOG_PAGE; +while (page * VHOST_LOG_PAGE < address + length) { +vubr_log_page(dev->log_table, page); +page += VHOST_LOG_PAGE; +} +} + static void vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) { struct vring_desc *desc = vq->desc; struct vring_avail *avail = vq->avail; struct vring_used *used = vq->used; +uint64_t log_guest_addr =
Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote: > During migration devices continue writing to the guest's memory. > The writes has to be reported to QEMU. This change implements > minimal support in vhost-user-bridge required for successful > migration of a guest with virtio-net device. > > Signed-off-by: Victor Kaplansky> --- > v2: >- use log_guest_addr for used ring reported by qemu instead of > translating. >- use mmap_size and mmap_offset defined in new > VHOST_USER_SET_LOG_BASE interface. See the patch > "vhost-user: modify SET_LOG_BASE to pass mmap size and > offset". >- start logging dirty pages only after the appropriate feature > is set by a VHOST_USER_GET_PROTOCOL_FEATURES request. >- updated TODO list. > > tests/vhost-user-bridge.c | 169 > ++ > 1 file changed, 155 insertions(+), 14 deletions(-) > > diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c > index fa18ad5..8c1c997 100644 > --- a/tests/vhost-user-bridge.c > +++ b/tests/vhost-user-bridge.c > @@ -13,16 +13,22 @@ > /* > * TODO: > * - main should get parameters from the command line. > - * - implement all request handlers. > + * - implement all request handlers. Still not implemented: > + * vubr_set_protocol_features_exec() > + * vubr_get_queue_num_exec() > + * vubr_set_vring_enable_exec() > + * vubr_send_rarp_exec() > + * vubr_set_log_fd_exec() > * - test for broken requests and virtqueue. > * - implement features defined by Virtio 1.0 spec. > * - support mergeable buffers and indirect descriptors. > - * - implement RESET_DEVICE request. > * - implement clean shutdown. > * - implement non-blocking writes to UDP backend. > * - implement polling strategy. > */ > > +#define _FILE_OFFSET_BITS 64 > + > #include > #include > #include > @@ -166,6 +172,7 @@ typedef struct VubrVirtq { > struct vring_desc *desc; > struct vring_avail *avail; > struct vring_used *used; > +uint64_t log_guest_addr; > } VubrVirtq; > > /* Based on qemu/hw/virtio/vhost-user.c */ > @@ -173,6 +180,9 @@ typedef struct VubrVirtq { > #define VHOST_MEMORY_MAX_NREGIONS8 > #define VHOST_USER_F_PROTOCOL_FEATURES 30 > > +typedef uint8_t vhost_log_chunk_t; Most code just uses uint8_t directly - I think you should just drop this typedef. > +#define VHOST_LOG_PAGE 4096 > + > enum VhostUserProtocolFeature { > VHOST_USER_PROTOCOL_F_MQ = 0, > VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, > @@ -220,6 +230,11 @@ typedef struct VhostUserMemory { > VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > } VhostUserMemory; > > +typedef struct VhostUserLog { > +uint64_t mmap_size; > +uint64_t mmap_offset; > +} VhostUserLog; > + > typedef struct VhostUserMsg { > VhostUserRequest request; > > @@ -234,6 +249,7 @@ typedef struct VhostUserMsg { > struct vhost_vring_state state; > struct vhost_vring_addr addr; > VhostUserMemory memory; > +VhostUserLog log; > } payload; > int fds[VHOST_MEMORY_MAX_NREGIONS]; > int fd_num; > @@ -265,8 +281,13 @@ typedef struct VubrDev { > uint32_t nregions; > VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > VubrVirtq vq[MAX_NR_VIRTQUEUE]; > +int log_call_fd; This doesn't seem to be used. Pls add code to signal this after logging. > +uint64_t log_size; > +vhost_log_chunk_t *log_table; > int backend_udp_sock; > struct sockaddr_in backend_udp_dest; > +int ready; > +uint64_t features; > } VubrDev; > > static const char *vubr_request_str[] = { > @@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) > > rc = recvmsg(conn_fd, , 0); > > -if (rc <= 0) { > +if (rc == 0) { > +vubr_die("recvmsg"); > +fprintf(stderr, "Peer disconnected.\n"); > +exit(1); > +} > +if (rc < 0) { > vubr_die("recvmsg"); > } > > @@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) > > if (vmsg->size) { > rc = read(conn_fd, >payload, vmsg->size); > -if (rc <= 0) { > +if (rc == 0) { > +vubr_die("recvmsg"); > +fprintf(stderr, "Peer disconnected.\n"); > +exit(1); > +} > +if (rc < 0) { > vubr_die("recvmsg"); > } > > @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq) > } > } > > + > +static void > +vubr_log_page(uint8_t *log_table, uint64_t page) > +{ > +DPRINT("Logged dirty guest page: %"PRId64"\n", page); > +log_table[page / 8] |= 1 << (page % 8); > +} > + This is only safe if there's a single writer. Please add a comment that says as much. Or set this atomically, that's also not hard to do. > +static void > +vubr_log_write(VubrDev *dev, uint64_t address,
Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages
On Thu, Nov 12, 2015 at 04:38:51PM +0200, Michael S. Tsirkin wrote: > On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote: > > /* Based on qemu/hw/virtio/vhost-user.c */ > > @@ -173,6 +180,9 @@ typedef struct VubrVirtq { > > #define VHOST_MEMORY_MAX_NREGIONS8 > > #define VHOST_USER_F_PROTOCOL_FEATURES 30 > > > > +typedef uint8_t vhost_log_chunk_t; > > Most code just uses uint8_t directly - I think you should > just drop this typedef. Oh, right. I'll clean this. > > @@ -234,6 +249,7 @@ typedef struct VhostUserMsg { > > struct vhost_vring_state state; > > struct vhost_vring_addr addr; > > VhostUserMemory memory; > > +VhostUserLog log; > > } payload; > > int fds[VHOST_MEMORY_MAX_NREGIONS]; > > int fd_num; > > @@ -265,8 +281,13 @@ typedef struct VubrDev { > > uint32_t nregions; > > VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > > VubrVirtq vq[MAX_NR_VIRTQUEUE]; > > +int log_call_fd; > > This doesn't seem to be used. Pls add code to signal this > after logging. > Right, implementation of SET_LOG_FD request handler was on my TODO list. I'll include it in the next version of the patch (as well as using it for the signaling). > > @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq) > > } > > } > > > > + > > +static void > > +vubr_log_page(uint8_t *log_table, uint64_t page) > > +{ > > +DPRINT("Logged dirty guest page: %"PRId64"\n", page); > > +log_table[page / 8] |= 1 << (page % 8); > > +} > > + > > This is only safe if there's a single writer. > Please add a comment that says as much. > Or set this atomically, that's also not hard to do. > I'll set it atomically. > > +static void > > +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) > > +{ > > +uint64_t page; > > + > > +if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) { > > +return; > > +} > > + > > +assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8)); > > I think there's an off by 1 here. > Imagine size == 0, test should always fail. > But imagine address == 0 and length == 1. > You get >= and test passes, seems wrong. > Oh, good catch. Will fix it. Hopefully, right way this time. ;-) > > +rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, > > + log_mmap_offset); > > +if (rc == MAP_FAILED) { > > +vubr_die("mmap"); > > +} > > +dev->log_table = (vhost_log_chunk_t *) rc; > > Cast is not needed here. > Indeed. > > @@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg > > *vmsg) > > DPRINT("Waiting for kicks on fd: %d for vq: %d\n", > > dev->vq[index].kick_fd, index); > > } > > +if (dev->vq[0].kick_fd != -1 && > > +dev->vq[1].kick_fd != -1) { > > +dev->ready = 1; > > I'm not sure what this does. Related to logging somehow? > Anyway, processing a VQ should happen after either > - you received a kick > or > - you received VRING_ENABLE It is a temporarily hack to determine if vrings are ready for processing. AFAIR, DPDK code uses the same heuristics. I'll add an explanation in the comments. --Victor