Christoph Hellwig wrote:
Inspired by vhost-net implementation, I did initial prototype of vhost-blk to see if it provides any benefits over QEMU virtio-blk.
I haven't handled all the error cases, fixed naming conventions etc.,
but the implementation is stable to play with. I tried not to deviate
from vhost-net implementation where possible.

Can you also send the qemu side of it?
Its pretty hacky and based it on old patch (vhost-net) from MST for simplicity. I haven't focused on cleaning it up and I will re-base it on MST's latest code
once it gets into QEMU.

Thanks,
Badari

---
hw/virtio-blk.c |  199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 199 insertions(+)

Index: vhost/hw/virtio-blk.c
===================================================================
--- vhost.orig/hw/virtio-blk.c  2010-02-25 16:47:04.000000000 -0500
+++ vhost/hw/virtio-blk.c       2010-03-17 14:07:26.477430740 -0400
@@ -18,6 +18,7 @@
#ifdef __linux__
# include <scsi/sg.h>
#endif
+#include <kvm.h>

typedef struct VirtIOBlock
{
@@ -28,8 +29,13 @@
    char serial_str[BLOCK_SERIAL_STRLEN + 1];
    QEMUBH *bh;
    size_t config_size;
+    uint8_t vhost_started;
} VirtIOBlock;

+typedef struct BDRVRawState {
+    int fd;
+} BDRVRawState;
+
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
{
    return (VirtIOBlock *)vdev;
@@ -501,6 +507,198 @@
    return 0;
}

+#if 1
+#include "linux/vhost.h"
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include "vhost.h"
+
+int vhost_blk_fd;
+
+struct slot_info {
+        unsigned long phys_addr;
+        unsigned long len;
+        unsigned long userspace_addr;
+        unsigned flags;
+        int logging_count;
+};
+
+extern struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
+
+static int vhost_blk_start(struct VirtIODevice *vdev)
+{
+       target_phys_addr_t s, l, a;
+       int r, num, idx = 0;
+       struct vhost_vring_state state;
+       struct vhost_vring_file file;
+       struct vhost_vring_addr addr;
+       unsigned long long used_phys;
+       void *desc, *avail, *used;
+       int i, n =0;
+       struct VirtQueue *q = virtio_queue(vdev, idx);
+       VirtIOBlock *vb = to_virtio_blk(vdev);
+       struct vhost_memory *mem;
+       BDRVRawState *st = vb->bs->opaque;
+
+       vhost_blk_fd = open("/dev/vhost-blk", O_RDWR);
+       if (vhost_blk_fd < 0) {
+               fprintf(stderr, "unable to open vhost-blk\n");
+               return -errno;
+       }
+
+       r = ioctl(vhost_blk_fd, VHOST_SET_OWNER, NULL);
+        if (r < 0) {
+               fprintf(stderr, "ioctl VHOST_SET_OWNER failed\n");
+                return -errno;
+       }
+
+        for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+                if (!slots[i].len ||
+                       (slots[i].flags & KVM_MEM_LOG_DIRTY_PAGES)) {
+                                      continue;
+                }
+                ++n;
+        }
+
+        mem = qemu_mallocz(offsetof(struct vhost_memory, regions) +
+                           n * sizeof(struct vhost_memory_region));
+        if (!mem)
+                return -ENOMEM;
+
+        mem->nregions = n;
+        n = 0;
+        for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+                if (!slots[i].len || (slots[i].flags &
+                       KVM_MEM_LOG_DIRTY_PAGES)) {
+                        continue;
+                }
+                mem->regions[n].guest_phys_addr = slots[i].phys_addr;
+                mem->regions[n].memory_size = slots[i].len;
+                mem->regions[n].userspace_addr = slots[i].userspace_addr;
+                ++n;
+        }
+
+        r = ioctl(vhost_blk_fd, VHOST_SET_MEM_TABLE, mem);
+        if (r < 0)
+                return -errno;
+
+       state.index = idx;
+       num = state.num = virtio_queue_get_num(vdev, idx);
+       r = ioctl(vhost_blk_fd, VHOST_SET_VRING_NUM, &state);
+        if (r) {
+               fprintf(stderr, "ioctl VHOST_SET_VRING_NUM failed\n");
+                return -errno;
+        }
+
+       state.num = virtio_queue_last_avail_idx(vdev, idx);
+       r = ioctl(vhost_blk_fd, VHOST_SET_VRING_BASE, &state);
+       if (r) {
+               fprintf(stderr, "ioctl VHOST_SET_VRING_BASE failed\n");
+                return -errno;
+       }
+
+       s = l = sizeof(struct vring_desc) * num;
+       a = virtio_queue_get_desc(vdev, idx);
+       desc = cpu_physical_memory_map(a, &l, 0);
+       if (!desc || l != s) {
+                r = -ENOMEM;
+                goto fail_alloc;
+       }
+       s = l = offsetof(struct vring_avail, ring) +
+                sizeof(u_int64_t) * num;
+        a = virtio_queue_get_avail(vdev, idx);
+        avail = cpu_physical_memory_map(a, &l, 0);
+        if (!avail || l != s) {
+                r = -ENOMEM;
+                goto fail_alloc;
+        }
+        s = l = offsetof(struct vring_used, ring) +
+                sizeof(struct vring_used_elem) * num;
+        used_phys = a = virtio_queue_get_used(vdev, idx);
+        used = cpu_physical_memory_map(a, &l, 1);
+        if (!used || l != s) {
+                r = -ENOMEM;
+                goto fail_alloc;
+        }
+
+       addr.index = idx,
+       addr.desc_user_addr = (u_int64_t)(unsigned long)desc,
+       addr.avail_user_addr = (u_int64_t)(unsigned long)avail,
+       addr.used_user_addr = (u_int64_t)(unsigned long)used,
+       addr.log_guest_addr = used_phys,
+       addr.flags = 0;
+        r = ioctl(vhost_blk_fd, VHOST_SET_VRING_ADDR, &addr);
+        if (r < 0) {
+               fprintf(stderr, "ioctl VHOST_SET_VRING_ADDR failed\n");
+               r = -errno;
+               goto fail_alloc;
+        }
+       if (!vdev->binding->guest_notifier || !vdev->binding->host_notifier) {
+                fprintf(stderr, "binding does not support irqfd/queuefd\n");
+                r = -ENOSYS;
+                goto fail_alloc;
+        }
+        r = vdev->binding->guest_notifier(vdev->binding_opaque, idx, true);
+        if (r < 0) {
+                fprintf(stderr, "Error binding guest notifier: %d\n", -r);
+                goto fail_guest_notifier;
+        }
+
+        r = vdev->binding->host_notifier(vdev->binding_opaque, idx, true);
+        if (r < 0) {
+                fprintf(stderr, "Error binding host notifier: %d\n", -r);
+                goto fail_host_notifier;
+        }
+
+        file.index = idx;
+        file.fd = event_notifier_get_fd(virtio_queue_host_notifier(q));
+        r = ioctl(vhost_blk_fd, VHOST_SET_VRING_KICK, &file);
+        if (r) {
+                goto fail_kick;
+        }
+
+        file.fd = event_notifier_get_fd(virtio_queue_guest_notifier(q));
+        r = ioctl(vhost_blk_fd, VHOST_SET_VRING_CALL, &file);
+        if (r) {
+                goto fail_call;
+        }
+        file.fd =  st->fd;
+        r = ioctl(vhost_blk_fd, VHOST_NET_SET_BACKEND, &file);
+        if (r) {
+               r = -errno;
+                goto fail_call;
+       }
+       return 0;
+fail_call:
+fail_kick:
+        vdev->binding->host_notifier(vdev->binding_opaque, idx, false);
+fail_host_notifier:
+        vdev->binding->guest_notifier(vdev->binding_opaque, idx, false);
+fail_guest_notifier:
+fail_alloc:
+        return r;
+}
+
+static void virtio_blk_set_status(struct VirtIODevice *vdev)
+{
+       VirtIOBlock *s = to_virtio_blk(vdev);
+
+       if (s->vhost_started)
+               return;
+
+       if (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+               int r = vhost_blk_start(vdev);
+               if (r < 0) {
+                       fprintf(stderr, "unable to start vhost blk: %d\n", r);
+               } else {
+                       s->vhost_started = 1;
+               }
+       }
+}
+
+#endif
+
VirtIODevice *virtio_blk_init(DeviceState *dev, DriveInfo *dinfo)
{
    VirtIOBlock *s;
@@ -517,6 +715,7 @@
    s->config_size = size;
    s->vdev.get_config = virtio_blk_update_config;
    s->vdev.get_features = virtio_blk_get_features;
+    s->vdev.set_status = virtio_blk_set_status;
    s->vdev.reset = virtio_blk_reset;
    s->bs = dinfo->bdrv;
    s->rq = NULL;




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to