Re: [kvm-devel] QEMU/KVM: virtio-blk asynchronous IO

2008-03-29 Thread Anthony Liguori
Marcelo Tosatti wrote:
 Use asynchronous IO in the virtio block QEMU driver.

 virtio_blk_handle_output should not block for long periods, since it
 holds the mutex lock prohibiting other vcpu's from doing IO to QEMU
 devices. Without AIO write intensive benchmarks make guests hang for
 several seconds. Write performance also increases significantly.

 Also report errors properly.

 To take full advantage of parallel IO we need to allow for more than AIO
 thread per-fd, or use direct IO (-nocache) which uses kernel AIO.

 Separate patch allows virtio-block guest driver to queue more than one
 element in the virtio ring.

 Anthony, this patch abuses the virtqueue_push() interface by passing a
 VirtQueueElement with only index member valid, since we know this is
 all it uses. Doing so avoids allocation, zeroing and copy of an entire
 VirtQueueElement structure. What do you say?
   

So I'd like to do this a little differently.  In a few minutes, I'll be 
sending a virtio patch series to QEMU.  In it, I introduce a standard 
way to deal with scatter/gather lists.  Assuming that goes well, I think 
we should add a proper bdrv_aio_readv/writev interface that takes an 
IOVector.  That simplifies the virtio-blk support quite a bit.  We also 
need to implement a reset handler when we introduce aio support that 
cancels and pending aio requests.

Regards,

Anthony Liguori


-
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] QEMU/KVM: virtio-blk asynchronous IO

2008-03-27 Thread Marcelo Tosatti

Use asynchronous IO in the virtio block QEMU driver.

virtio_blk_handle_output should not block for long periods, since it
holds the mutex lock prohibiting other vcpu's from doing IO to QEMU
devices. Without AIO write intensive benchmarks make guests hang for
several seconds. Write performance also increases significantly.

Also report errors properly.

To take full advantage of parallel IO we need to allow for more than AIO
thread per-fd, or use direct IO (-nocache) which uses kernel AIO.

Separate patch allows virtio-block guest driver to queue more than one
element in the virtio ring.

Anthony, this patch abuses the virtqueue_push() interface by passing a
VirtQueueElement with only index member valid, since we know this is
all it uses. Doing so avoids allocation, zeroing and copy of an entire
VirtQueueElement structure. What do you say?

Index: kvm-userspace.io/qemu/hw/virtio-blk.c
===
--- kvm-userspace.io.orig/qemu/hw/virtio-blk.c
+++ kvm-userspace.io/qemu/hw/virtio-blk.c
@@ -77,53 +77,100 @@ static VirtIOBlock *to_virtio_blk(VirtIO
 return (VirtIOBlock *)vdev;
 }
 
+typedef struct VirtIOBlockReq
+{
+VirtIODevice *vdev;
+VirtQueue *vq;
+struct iovec in_sg_status;
+unsigned int pending;
+unsigned int len;
+unsigned int elem_idx;
+int status;
+} VirtIOBlockReq;
+
+static void virtio_blk_rw_complete(void *opaque, int ret)
+{
+VirtIOBlockReq *req = opaque;
+struct virtio_blk_inhdr *in;
+VirtQueueElement elem;
+
+req-status |= ret;
+if (--req-pending  0)
+return;
+
+elem.index = req-elem_idx;
+in = (void *)req-in_sg_status.iov_base;
+
+in-status = req-status ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
+virtqueue_push(req-vq, elem, req-len);
+virtio_notify(req-vdev, req-vq);
+qemu_free(req);
+}
+
 static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
 VirtIOBlock *s = to_virtio_blk(vdev);
 VirtQueueElement elem;
+VirtIOBlockReq *req;
 unsigned int count;
 
 while ((count = virtqueue_pop(vq, elem)) != 0) {
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr *out;
-   unsigned int wlen;
off_t off;
int i;
 
+   /*
+* FIXME: limit the number of in-flight requests
+*/
+   req = qemu_malloc(sizeof(VirtIOBlockReq));
+   if (!req)
+   return;
+   memset(req, 0, sizeof(*req));
+   memcpy(req-in_sg_status, elem.in_sg[elem.in_num - 1],
+  sizeof(req-in_sg_status));
+   req-vdev = vdev;
+   req-vq = vq;
+   req-elem_idx = elem.index;
+
out = (void *)elem.out_sg[0].iov_base;
in = (void *)elem.in_sg[elem.in_num - 1].iov_base;
off = out-sector;
 
if (out-type  VIRTIO_BLK_T_SCSI_CMD) {
-   wlen = sizeof(*in);
+   unsigned int len = sizeof(*in);
+
in-status = VIRTIO_BLK_S_UNSUPP;
+   virtqueue_push(vq, elem, len);
+   virtio_notify(vdev, vq);
+   qemu_free(req);
+
} else if (out-type  VIRTIO_BLK_T_OUT) {
-   wlen = sizeof(*in);
+   req-pending = elem.out_num - 1;
 
for (i = 1; i  elem.out_num; i++) {
-   bdrv_write(s-bs, off,
+   bdrv_aio_write(s-bs, off,
   elem.out_sg[i].iov_base,
-  elem.out_sg[i].iov_len / 512);
+  elem.out_sg[i].iov_len / 512,
+  virtio_blk_rw_complete,
+  req);
off += elem.out_sg[i].iov_len / 512;
+   req-len += elem.out_sg[i].iov_len;
}
 
-   in-status = VIRTIO_BLK_S_OK;
} else {
-   wlen = sizeof(*in);
+   req-pending = elem.in_num - 1;
 
for (i = 0; i  elem.in_num - 1; i++) {
-   bdrv_read(s-bs, off,
+   bdrv_aio_read(s-bs, off,
  elem.in_sg[i].iov_base,
- elem.in_sg[i].iov_len / 512);
+ elem.in_sg[i].iov_len / 512,
+ virtio_blk_rw_complete,
+ req);
off += elem.in_sg[i].iov_len / 512;
-   wlen += elem.in_sg[i].iov_len;
+   req-len += elem.in_sg[i].iov_len;
}
-
-   in-status = VIRTIO_BLK_S_OK;
}
-
-   virtqueue_push(vq, elem, wlen);
-   virtio_notify(vdev, vq);
 }
 }
 

-
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] QEMU/KVM: virtio-blk asynchronous IO

2008-03-27 Thread Anthony Liguori
Marcelo Tosatti wrote:
 Use asynchronous IO in the virtio block QEMU driver.

 virtio_blk_handle_output should not block for long periods, since it
 holds the mutex lock prohibiting other vcpu's from doing IO to QEMU
 devices. Without AIO write intensive benchmarks make guests hang for
 several seconds. Write performance also increases significantly.

 Also report errors properly.

 To take full advantage of parallel IO we need to allow for more than AIO
 thread per-fd, or use direct IO (-nocache) which uses kernel AIO.

 Separate patch allows virtio-block guest driver to queue more than one
 element in the virtio ring.

 Anthony, this patch abuses the virtqueue_push() interface by passing a
 VirtQueueElement with only index member valid, since we know this is
 all it uses. Doing so avoids allocation, zeroing and copy of an entire
 VirtQueueElement structure. What do you say?
   

I've got a virtio patch series that changes the virtqueue_pop() 
interface to return a pointer to a VirtQueueElement which I believe 
addresses your use-case.

I can just fold your patch into my series (I'll be sending it out this 
afternoon).

Regards,

Anthony Liguori

 Index: kvm-userspace.io/qemu/hw/virtio-blk.c
 ===
 --- kvm-userspace.io.orig/qemu/hw/virtio-blk.c
 +++ kvm-userspace.io/qemu/hw/virtio-blk.c
 @@ -77,53 +77,100 @@ static VirtIOBlock *to_virtio_blk(VirtIO
  return (VirtIOBlock *)vdev;
  }

 +typedef struct VirtIOBlockReq
 +{
 +VirtIODevice *vdev;
 +VirtQueue *vq;
 +struct iovec in_sg_status;
 +unsigned int pending;
 +unsigned int len;
 +unsigned int elem_idx;
 +int status;
 +} VirtIOBlockReq;
 +
 +static void virtio_blk_rw_complete(void *opaque, int ret)
 +{
 +VirtIOBlockReq *req = opaque;
 +struct virtio_blk_inhdr *in;
 +VirtQueueElement elem;
 +
 +req-status |= ret;
 +if (--req-pending  0)
 +return;
 +
 +elem.index = req-elem_idx;
 +in = (void *)req-in_sg_status.iov_base;
 +
 +in-status = req-status ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
 +virtqueue_push(req-vq, elem, req-len);
 +virtio_notify(req-vdev, req-vq);
 +qemu_free(req);
 +}
 +
  static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
  {
  VirtIOBlock *s = to_virtio_blk(vdev);
  VirtQueueElement elem;
 +VirtIOBlockReq *req;
  unsigned int count;

  while ((count = virtqueue_pop(vq, elem)) != 0) {
   struct virtio_blk_inhdr *in;
   struct virtio_blk_outhdr *out;
 - unsigned int wlen;
   off_t off;
   int i;

 + /*
 +  * FIXME: limit the number of in-flight requests
 +  */
 + req = qemu_malloc(sizeof(VirtIOBlockReq));
 + if (!req)
 + return;
 + memset(req, 0, sizeof(*req));
 + memcpy(req-in_sg_status, elem.in_sg[elem.in_num - 1],
 +sizeof(req-in_sg_status));
 + req-vdev = vdev;
 + req-vq = vq;
 + req-elem_idx = elem.index;
 +
   out = (void *)elem.out_sg[0].iov_base;
   in = (void *)elem.in_sg[elem.in_num - 1].iov_base;
   off = out-sector;

   if (out-type  VIRTIO_BLK_T_SCSI_CMD) {
 - wlen = sizeof(*in);
 + unsigned int len = sizeof(*in);
 +
   in-status = VIRTIO_BLK_S_UNSUPP;
 + virtqueue_push(vq, elem, len);
 + virtio_notify(vdev, vq);
 + qemu_free(req);
 +
   } else if (out-type  VIRTIO_BLK_T_OUT) {
 - wlen = sizeof(*in);
 + req-pending = elem.out_num - 1;

   for (i = 1; i  elem.out_num; i++) {
 - bdrv_write(s-bs, off,
 + bdrv_aio_write(s-bs, off,
  elem.out_sg[i].iov_base,
 -elem.out_sg[i].iov_len / 512);
 +elem.out_sg[i].iov_len / 512,
 +virtio_blk_rw_complete,
 +req);
   off += elem.out_sg[i].iov_len / 512;
 + req-len += elem.out_sg[i].iov_len;
   }

 - in-status = VIRTIO_BLK_S_OK;
   } else {
 - wlen = sizeof(*in);
 + req-pending = elem.in_num - 1;

   for (i = 0; i  elem.in_num - 1; i++) {
 - bdrv_read(s-bs, off,
 + bdrv_aio_read(s-bs, off,
 elem.in_sg[i].iov_base,
 -   elem.in_sg[i].iov_len / 512);
 +   elem.in_sg[i].iov_len / 512,
 +   virtio_blk_rw_complete,
 +   req);
   off += elem.in_sg[i].iov_len / 512;
 - wlen += elem.in_sg[i].iov_len;
 + req-len += elem.in_sg[i].iov_len;
   }
 -
 - in-status = VIRTIO_BLK_S_OK;
   }
 -
 - virtqueue_push(vq, elem, wlen);
 - virtio_notify(vdev, vq);
  }
  }

   


-
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open