Re: [PATCH 5/5] virtio-blk: implement -make_request

2011-10-06 Thread Christoph Hellwig
On Thu, Oct 06, 2011 at 12:22:14PM +1030, Rusty Russell wrote:
 On Wed, 05 Oct 2011 15:54:08 -0400, Christoph Hellwig h...@infradead.org 
 wrote:
  Add an alternate I/O path that implements -make_request for virtio-blk.
  This is required for high IOPs devices which get slowed down to 1/5th of
  the native speed by all the locking, memory allocation and other overhead
  in the request based I/O path.
 
 Ouch.
 
 I'd be tempted to just switch across to this, though I'd be interested
 to see if the simple add_buf change I referred to before has some effect
 by itself (I doubt it).

Benchmarking this more extensively even on low-end devices is number
on my todo list after sorting out the virtqueue race and implementing
flush/fua support.  I'd really prefer to switch over to it
unconditionally if the performance numbers allow it.

 Also, though it's overkill I'd use standard list primitives rather than
 open-coding a single linked list.

I really prefer using standard helpers, but using a doubly linked list
and increasing memory usage seems like such a waste.  Maybe I should
annoy Linus by proposing another iteration of a common single linked
list implementation :)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] virtio-blk: implement -make_request

2011-10-06 Thread Jens Axboe
On Wed, Oct 05 2011, Christoph Hellwig wrote:
 Add an alternate I/O path that implements -make_request for virtio-blk.
 This is required for high IOPs devices which get slowed down to 1/5th of
 the native speed by all the locking, memory allocation and other overhead
 in the request based I/O path.

We definitely have some performance fruit hanging rather low in that
path, but a factor 5 performance difference sounds insanely excessive. I
haven't looked at virtio_blk in detail, but I've done 500K+ IOPS on
request based drivers before (yes, on real hardware). So it could be
that virtio_blk is just doing things rather suboptimally in some places,
and that it would be possible to claim most of that speedup there too.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/5] virtio-blk: implement -make_request

2011-10-05 Thread Christoph Hellwig
Add an alternate I/O path that implements -make_request for virtio-blk.
This is required for high IOPs devices which get slowed down to 1/5th of
the native speed by all the locking, memory allocation and other overhead
in the request based I/O path.

This patch is not quite merge ready due to two issues:

 - it doesn't implement FUA and FLUSH requests yet
 - it hardcodes which I/O path to chose

Signed-off-by: Christoph Hellwig h...@lst.de

Index: linux-2.6/drivers/block/virtio_blk.c
===
--- linux-2.6.orig/drivers/block/virtio_blk.c   2011-10-05 10:36:42.883913334 
-0400
+++ linux-2.6/drivers/block/virtio_blk.c2011-10-05 15:29:35.591405323 
-0400
@@ -11,6 +11,8 @@
 
 #define PART_BITS 4
 
+static int use_make_request = 1;
+
 static int major, index;
 struct workqueue_struct *virtblk_wq;
 
@@ -20,6 +22,7 @@ struct virtio_blk
 
struct virtio_device *vdev;
struct virtqueue *vq;
+   wait_queue_head_t queue_wait;
 
/* The disk structure for the kernel. */
struct gendisk *disk;
@@ -39,11 +42,13 @@ struct virtio_blk
 struct virtblk_req
 {
void *private;
+   struct virtblk_req *next;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
u8 kind;
 #define VIRTIO_BLK_REQUEST 0x00
-#define VIRTIO_BLK_INTERNAL0x01
+#define VIRTIO_BLK_BIO 0x01
+#define VIRTIO_BLK_INTERNAL0x02
u8 status;
 };
 
@@ -74,10 +79,17 @@ static void virtblk_request_done(struct
mempool_free(vbr, vblk-pool);
 }
 
+static void virtblk_bio_done(struct virtio_blk *vblk,
+   struct virtblk_req *vbr)
+{
+   bio_endio(vbr-private, virtblk_result(vbr));
+   mempool_free(vbr, vblk-pool);
+}
+
 static void blk_done(struct virtqueue *vq)
 {
struct virtio_blk *vblk = vq-vdev-priv;
-   struct virtblk_req *vbr;
+   struct virtblk_req *vbr, *head = NULL, *tail = NULL;
unsigned int len;
unsigned long flags;
 
@@ -88,15 +100,47 @@ static void blk_done(struct virtqueue *v
virtblk_request_done(vblk, vbr);
break;
case VIRTIO_BLK_INTERNAL:
-   complete(vbr-private);
+   case VIRTIO_BLK_BIO:
+   if (head) {
+   tail-next = vbr;
+   tail = vbr;
+   } else {
+   tail = head = vbr;
+   }
break;
default:
BUG();
}
}
-   /* In case queue is stopped waiting for more buffers. */
-   blk_start_queue(vblk-disk-queue);
+
+   if (!use_make_request) {
+   /* In case queue is stopped waiting for more buffers. */
+   blk_start_queue(vblk-disk-queue);
+   }
spin_unlock_irqrestore(vblk-lock, flags);
+
+   wake_up(vblk-queue_wait);
+
+   /*
+* Process completions after freeing up space in the virtqueue and
+* dropping the lock.
+*/
+   while (head) {
+   vbr = head;
+   head = head-next;
+
+   switch (vbr-kind) {
+   case VIRTIO_BLK_BIO:
+   virtblk_bio_done(vblk, vbr);
+   break;
+   case VIRTIO_BLK_INTERNAL:
+   complete(vbr-private);
+   break;
+   default:
+   BUG();
+   }
+
+   }
 }
 
 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -111,6 +155,7 @@ static bool do_req(struct request_queue
return false;
 
vbr-private = req;
+   vbr-next = NULL;
vbr-kind = VIRTIO_BLK_REQUEST;
 
if (req-cmd_flags  REQ_FLUSH) {
@@ -199,6 +244,128 @@ static void do_virtblk_request(struct re
virtqueue_kick(vblk-vq);
 }
 
+struct virtblk_plug_cb {
+   struct blk_plug_cb cb;
+   struct virtio_blk *vblk;
+};
+
+static void virtblk_unplug(struct blk_plug_cb *bcb)
+{
+   struct virtblk_plug_cb *cb =
+   container_of(bcb, struct virtblk_plug_cb, cb);
+
+   virtqueue_notify(cb-vblk-vq);
+   kfree(cb);
+}
+
+static bool virtblk_plugged(struct virtio_blk *vblk)
+{
+   struct blk_plug *plug = current-plug;
+   struct virtblk_plug_cb *cb;
+
+   if (!plug)
+   return false;
+
+   list_for_each_entry(cb, plug-cb_list, cb.list) {
+   if (cb-cb.callback == virtblk_unplug  cb-vblk == vblk)
+   return true;
+   }
+
+   /* Not currently on the callback list */
+   cb = kmalloc(sizeof(*cb), GFP_ATOMIC);
+   if (!cb)
+   return false;
+
+   cb-vblk = vblk;
+   cb-cb.callback = virtblk_unplug;
+   list_add(cb-cb.list, plug-cb_list);
+   return true;
+}
+
+static void 

Re: [PATCH 5/5] virtio-blk: implement -make_request

2011-10-05 Thread Rusty Russell
On Wed, 05 Oct 2011 15:54:08 -0400, Christoph Hellwig h...@infradead.org 
wrote:
 Add an alternate I/O path that implements -make_request for virtio-blk.
 This is required for high IOPs devices which get slowed down to 1/5th of
 the native speed by all the locking, memory allocation and other overhead
 in the request based I/O path.

Ouch.

I'd be tempted to just switch across to this, though I'd be interested
to see if the simple add_buf change I referred to before has some effect
by itself (I doubt it).

Also, though it's overkill I'd use standard list primitives rather than
open-coding a single linked list.

Thanks!
Rusty.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html