[Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages

2015-11-12 Thread Victor Kaplansky
During migration devices continue writing to the guest's memory.
The writes has to be reported to QEMU. This change implements
minimal support in vhost-user-bridge required for successful
migration of a guest with virtio-net device.

Signed-off-by: Victor Kaplansky 
---
v2:
   - use log_guest_addr for used ring reported by qemu instead of
 translating.
   - use mmap_size and mmap_offset defined in new
 VHOST_USER_SET_LOG_BASE interface. See the patch
 "vhost-user: modify SET_LOG_BASE to pass mmap size and
 offset".
   - start logging dirty pages only after the appropriate feature
 is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
   - updated TODO list.

 tests/vhost-user-bridge.c | 169 ++
 1 file changed, 155 insertions(+), 14 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index fa18ad5..8c1c997 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -13,16 +13,22 @@
 /*
  * TODO:
  * - main should get parameters from the command line.
- * - implement all request handlers.
+ * - implement all request handlers. Still not implemented:
+ *  vubr_set_protocol_features_exec()
+ *  vubr_get_queue_num_exec()
+ *  vubr_set_vring_enable_exec()
+ *  vubr_send_rarp_exec()
+ *  vubr_set_log_fd_exec()
  * - test for broken requests and virtqueue.
  * - implement features defined by Virtio 1.0 spec.
  * - support mergeable buffers and indirect descriptors.
- * - implement RESET_DEVICE request.
  * - implement clean shutdown.
  * - implement non-blocking writes to UDP backend.
  * - implement polling strategy.
  */
 
+#define _FILE_OFFSET_BITS 64
+
 #include 
 #include 
 #include 
@@ -166,6 +172,7 @@ typedef struct VubrVirtq {
 struct vring_desc *desc;
 struct vring_avail *avail;
 struct vring_used *used;
+uint64_t log_guest_addr;
 } VubrVirtq;
 
 /* Based on qemu/hw/virtio/vhost-user.c */
@@ -173,6 +180,9 @@ typedef struct VubrVirtq {
 #define VHOST_MEMORY_MAX_NREGIONS8
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 
+typedef uint8_t vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 4096
+
 enum VhostUserProtocolFeature {
 VHOST_USER_PROTOCOL_F_MQ = 0,
 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 } VhostUserMemory;
 
+typedef struct VhostUserLog {
+uint64_t mmap_size;
+uint64_t mmap_offset;
+} VhostUserLog;
+
 typedef struct VhostUserMsg {
 VhostUserRequest request;
 
@@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
 struct vhost_vring_state state;
 struct vhost_vring_addr addr;
 VhostUserMemory memory;
+VhostUserLog log;
 } payload;
 int fds[VHOST_MEMORY_MAX_NREGIONS];
 int fd_num;
@@ -265,8 +281,13 @@ typedef struct VubrDev {
 uint32_t nregions;
 VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 VubrVirtq vq[MAX_NR_VIRTQUEUE];
+int log_call_fd;
+uint64_t log_size;
+vhost_log_chunk_t *log_table;
 int backend_udp_sock;
 struct sockaddr_in backend_udp_dest;
+int ready;
+uint64_t features;
 } VubrDev;
 
 static const char *vubr_request_str[] = {
@@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
 rc = recvmsg(conn_fd, , 0);
 
-if (rc <= 0) {
+if (rc == 0) {
+vubr_die("recvmsg");
+fprintf(stderr, "Peer disconnected.\n");
+exit(1);
+}
+if (rc < 0) {
 vubr_die("recvmsg");
 }
 
@@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
 
 if (vmsg->size) {
 rc = read(conn_fd, >payload, vmsg->size);
-if (rc <= 0) {
+if (rc == 0) {
+vubr_die("recvmsg");
+fprintf(stderr, "Peer disconnected.\n");
+exit(1);
+}
+if (rc < 0) {
 vubr_die("recvmsg");
 }
 
@@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
 }
 }
 
+
+static void
+vubr_log_page(uint8_t *log_table, uint64_t page)
+{
+DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+log_table[page / 8] |= 1 << (page % 8);
+}
+
+static void
+vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
+{
+uint64_t page;
+
+if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
+return;
+}
+
+assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
+
+page = address / VHOST_LOG_PAGE;
+while (page * VHOST_LOG_PAGE < address + length) {
+vubr_log_page(dev->log_table, page);
+page += VHOST_LOG_PAGE;
+}
+}
+
 static void
 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
 {
 struct vring_desc *desc   = vq->desc;
 struct vring_avail *avail = vq->avail;
 struct vring_used *used   = vq->used;
+uint64_t log_guest_addr = 

Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages

2015-11-12 Thread Michael S. Tsirkin
On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote:
> During migration devices continue writing to the guest's memory.
> The writes has to be reported to QEMU. This change implements
> minimal support in vhost-user-bridge required for successful
> migration of a guest with virtio-net device.
> 
> Signed-off-by: Victor Kaplansky 
> ---
> v2:
>- use log_guest_addr for used ring reported by qemu instead of
>  translating.
>- use mmap_size and mmap_offset defined in new
>  VHOST_USER_SET_LOG_BASE interface. See the patch
>  "vhost-user: modify SET_LOG_BASE to pass mmap size and
>  offset".
>- start logging dirty pages only after the appropriate feature
>  is set by a VHOST_USER_GET_PROTOCOL_FEATURES request.
>- updated TODO list.
> 
>  tests/vhost-user-bridge.c | 169 
> ++
>  1 file changed, 155 insertions(+), 14 deletions(-)
> 
> diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
> index fa18ad5..8c1c997 100644
> --- a/tests/vhost-user-bridge.c
> +++ b/tests/vhost-user-bridge.c
> @@ -13,16 +13,22 @@
>  /*
>   * TODO:
>   * - main should get parameters from the command line.
> - * - implement all request handlers.
> + * - implement all request handlers. Still not implemented:
> + *  vubr_set_protocol_features_exec()
> + *  vubr_get_queue_num_exec()
> + *  vubr_set_vring_enable_exec()
> + *  vubr_send_rarp_exec()
> + *  vubr_set_log_fd_exec()
>   * - test for broken requests and virtqueue.
>   * - implement features defined by Virtio 1.0 spec.
>   * - support mergeable buffers and indirect descriptors.
> - * - implement RESET_DEVICE request.
>   * - implement clean shutdown.
>   * - implement non-blocking writes to UDP backend.
>   * - implement polling strategy.
>   */
>  
> +#define _FILE_OFFSET_BITS 64
> +
>  #include 
>  #include 
>  #include 
> @@ -166,6 +172,7 @@ typedef struct VubrVirtq {
>  struct vring_desc *desc;
>  struct vring_avail *avail;
>  struct vring_used *used;
> +uint64_t log_guest_addr;
>  } VubrVirtq;
>  
>  /* Based on qemu/hw/virtio/vhost-user.c */
> @@ -173,6 +180,9 @@ typedef struct VubrVirtq {
>  #define VHOST_MEMORY_MAX_NREGIONS8
>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
>  
> +typedef uint8_t vhost_log_chunk_t;

Most code just uses uint8_t directly - I think you should
just drop this typedef.

> +#define VHOST_LOG_PAGE 4096
> +
>  enum VhostUserProtocolFeature {
>  VHOST_USER_PROTOCOL_F_MQ = 0,
>  VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
> @@ -220,6 +230,11 @@ typedef struct VhostUserMemory {
>  VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>  } VhostUserMemory;
>  
> +typedef struct VhostUserLog {
> +uint64_t mmap_size;
> +uint64_t mmap_offset;
> +} VhostUserLog;
> +
>  typedef struct VhostUserMsg {
>  VhostUserRequest request;
>  
> @@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
>  struct vhost_vring_state state;
>  struct vhost_vring_addr addr;
>  VhostUserMemory memory;
> +VhostUserLog log;
>  } payload;
>  int fds[VHOST_MEMORY_MAX_NREGIONS];
>  int fd_num;
> @@ -265,8 +281,13 @@ typedef struct VubrDev {
>  uint32_t nregions;
>  VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>  VubrVirtq vq[MAX_NR_VIRTQUEUE];
> +int log_call_fd;

This doesn't seem to be used. Pls add code to signal this
after logging.

> +uint64_t log_size;
> +vhost_log_chunk_t *log_table;
>  int backend_udp_sock;
>  struct sockaddr_in backend_udp_dest;
> +int ready;
> +uint64_t features;
>  } VubrDev;
>  
>  static const char *vubr_request_str[] = {
> @@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>  
>  rc = recvmsg(conn_fd, , 0);
>  
> -if (rc <= 0) {
> +if (rc == 0) {
> +vubr_die("recvmsg");
> +fprintf(stderr, "Peer disconnected.\n");
> +exit(1);
> +}
> +if (rc < 0) {
>  vubr_die("recvmsg");
>  }
>  
> @@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
>  
>  if (vmsg->size) {
>  rc = read(conn_fd, >payload, vmsg->size);
> -if (rc <= 0) {
> +if (rc == 0) {
> +vubr_die("recvmsg");
> +fprintf(stderr, "Peer disconnected.\n");
> +exit(1);
> +}
> +if (rc < 0) {
>  vubr_die("recvmsg");
>  }
>  
> @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
>  }
>  }
>  
> +
> +static void
> +vubr_log_page(uint8_t *log_table, uint64_t page)
> +{
> +DPRINT("Logged dirty guest page: %"PRId64"\n", page);
> +log_table[page / 8] |= 1 << (page % 8);
> +}
> +

This is only safe if there's a single writer.
Please add a comment that says as much.
Or set this atomically, that's also not hard to do.

> +static void
> +vubr_log_write(VubrDev *dev, uint64_t address, 

Re: [Qemu-devel] [PATCH v2] tests/vhost-user-bridge: implement logging of dirty pages

2015-11-12 Thread Victor Kaplansky
On Thu, Nov 12, 2015 at 04:38:51PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 12, 2015 at 02:34:56PM +0200, Victor Kaplansky wrote:
> >  /* Based on qemu/hw/virtio/vhost-user.c */
> > @@ -173,6 +180,9 @@ typedef struct VubrVirtq {
> >  #define VHOST_MEMORY_MAX_NREGIONS8
> >  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> >  
> > +typedef uint8_t vhost_log_chunk_t;
> 
> Most code just uses uint8_t directly - I think you should
> just drop this typedef.

Oh, right. I'll clean this.

> > @@ -234,6 +249,7 @@ typedef struct VhostUserMsg {
> >  struct vhost_vring_state state;
> >  struct vhost_vring_addr addr;
> >  VhostUserMemory memory;
> > +VhostUserLog log;
> >  } payload;
> >  int fds[VHOST_MEMORY_MAX_NREGIONS];
> >  int fd_num;
> > @@ -265,8 +281,13 @@ typedef struct VubrDev {
> >  uint32_t nregions;
> >  VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> >  VubrVirtq vq[MAX_NR_VIRTQUEUE];
> > +int log_call_fd;
> 
> This doesn't seem to be used. Pls add code to signal this
> after logging.
> 

Right, implementation of SET_LOG_FD request handler was on my TODO
list. I'll include it in the next version of the patch (as well
as using it for the signaling).

> > @@ -465,12 +496,39 @@ vubr_virtqueue_kick(VubrVirtq *vq)
> >  }
> >  }
> >  
> > +
> > +static void
> > +vubr_log_page(uint8_t *log_table, uint64_t page)
> > +{
> > +DPRINT("Logged dirty guest page: %"PRId64"\n", page);
> > +log_table[page / 8] |= 1 << (page % 8);
> > +}
> > +
> 
> This is only safe if there's a single writer.
> Please add a comment that says as much.
> Or set this atomically, that's also not hard to do.
> 

I'll set it atomically.

> > +static void
> > +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
> > +{
> > +uint64_t page;
> > +
> > +if (!(dev->features & VHOST_F_LOG_ALL) || !dev->log_table || !length) {
> > +return;
> > +}
> > +
> > +assert(dev->log_size >= ((address + length) / VHOST_LOG_PAGE / 8));
> 
> I think there's an off by 1 here.
> Imagine size == 0, test should always fail.
> But imagine address == 0 and length == 1.
> You get >= and test passes, seems wrong.
> 

Oh, good catch. Will fix it. Hopefully, right way this time. ;-)

> > +rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> > +  log_mmap_offset);
> > +if (rc == MAP_FAILED) {
> > +vubr_die("mmap");
> > +}
> > +dev->log_table = (vhost_log_chunk_t *) rc;
> 
> Cast is not needed here.
> 

Indeed.

> > @@ -829,6 +956,10 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg 
> > *vmsg)
> >  DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
> > dev->vq[index].kick_fd, index);
> >  }
> > +if (dev->vq[0].kick_fd != -1 &&
> > +dev->vq[1].kick_fd != -1) {
> > +dev->ready = 1;
> 
> I'm not sure what this does. Related to logging somehow?
> Anyway, processing a VQ should happen after either
> - you received a kick
> or
> - you received VRING_ENABLE

It is a temporarily hack to determine if vrings are ready for
processing. AFAIR, DPDK code uses the same heuristics. I'll add an
explanation in the comments.

--Victor