vmd currently performs disk io for the guest synchronously in the
same thread the guest is running in. it also uses bounce buffers
in between the guests "physical" memory and the read and writes
against the disk image.

this diff moves the vioblk ring processing into a taskq, ie, another
thread. this allows the guest to run while vmd is doing the reads
and writes against the disk image. currently vmd only creates a
single taskq for all vioblk devices to use. when the guest posts
and queue notify write, vmd simple task_adds the ring processing
to the taskq. when the ring is processed, vmd updates the completion
queue and posts an interrupt to the guest.

this diff also takes advantage of the new vaddr_mem and iovec_mem
APIs to avoid using bounce buffers between the guest and read/writes.
when the guest configures a ring address, vaddr_mem is used to get
direct access to the ring in vmds address space. reads and writes
by the guest use iovec_mem to fill out an iovec array, which is
then passed directly to preadv and pwritev.

because ring and io is now performed on a different thread to the
guests vcpu, memory ordering becomes a considering. this also adds
the use of membar_consumer and membar_producer from sys/atomic.h
to ensure the ring updates become visibile to the guest in the
correct order.

i would appreciate tests, particularly with vmd on i386 so i can
know if the atomic api is available there.

the vm.c chunk is a result of testing with seabios, which likes to
place the ring at the end of the physical address space.

ok?

Index: Makefile
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/Makefile,v
retrieving revision 1.16
diff -u -p -r1.16 Makefile
--- Makefile    3 Jul 2017 22:21:47 -0000       1.16
+++ Makefile    18 Sep 2017 00:39:00 -0000
@@ -7,6 +7,7 @@ SRCS=           vmd.c control.c log.c priv.c proc
 SRCS+=         vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
 SRCS+=         ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c packet.c
 SRCS+=         parse.y atomicio.c
+SRCS+=         task.c
 
 CFLAGS+=       -Wall -I${.CURDIR}
 CFLAGS+=       -Wstrict-prototypes -Wmissing-prototypes
Index: virtio.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
retrieving revision 1.54
diff -u -p -r1.54 virtio.c
--- virtio.c    17 Sep 2017 23:07:56 -0000      1.54
+++ virtio.c    18 Sep 2017 00:39:00 -0000
@@ -18,6 +18,7 @@
 
 #include <sys/param.h> /* PAGE_SIZE */
 #include <sys/socket.h>
+#include <sys/atomic.h> /* membars */
 
 #include <machine/vmmvar.h>
 #include <dev/pci/pcireg.h>
@@ -43,14 +44,48 @@
 #include "virtio.h"
 #include "loadfile.h"
 #include "atomicio.h"
+#include "task.h"
+
+#ifndef MIN
+#define MIN(_a, _b)    ((_a) < (_b) ? (_a) : (_b))
+#endif
+
+#ifndef nitems
+#define nitems(_a)     (sizeof(_a) / sizeof((_a)[0]))
+#endif
 
 extern char *__progname;
 
+struct vioblk_queue {
+       struct vioblk_dev       *dev;
+       void                    *ring;
+       struct virtio_vq_info    vq;
+       struct task              t;
+       struct event             ev;
+};
+
+struct vioblk_dev {
+       struct virtio_io_cfg cfg;
+
+       struct vioblk_queue q[VIRTIO_MAX_QUEUES];
+
+       int fd;
+       uint64_t sz;
+       uint32_t max_xfer;
+
+       uint32_t vm_id;
+       int irq;
+
+       uint8_t pci_id;
+};
+
 struct viornd_dev viornd;
 struct vioblk_dev *vioblk;
 struct vionet_dev *vionet;
 struct vmmci_dev vmmci;
 
+struct taskq *iotq;
+
 int nr_vionet;
 int nr_vioblk;
 
@@ -62,13 +97,12 @@ int nr_vioblk;
 #define VMMCI_F_ACK            (1<<1)
 #define VMMCI_F_SYNCRTC                (1<<2)
 
-struct ioinfo {
-       uint8_t *buf;
-       ssize_t len;
-       off_t offset;
-       int fd;
-       int error;
-};
+int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
+int vioblk_dump(int);
+int vioblk_restore(int, struct vm_create_params *, int *);
+void vioblk_update_qs(struct vioblk_dev *);
+void vioblk_update_qa(struct vioblk_dev *);
+int vioblk_notifyq(struct vioblk_dev *);
 
 const char *
 vioblk_cmd_name(uint32_t type)
@@ -85,6 +119,7 @@ vioblk_cmd_name(uint32_t type)
        }
 }
 
+#if 0
 static void
 dump_descriptor_chain(struct vring_desc *desc, int16_t dxx)
 {
@@ -108,6 +143,7 @@ dump_descriptor_chain(struct vring_desc 
            desc[dxx].flags,
            desc[dxx].next);
 }
+#endif
 
 static const char *
 virtio_reg_name(uint8_t reg)
@@ -323,7 +359,10 @@ vioblk_update_qa(struct vioblk_dev *dev)
        if (dev->cfg.queue_select > 0)
                return;
 
-       dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
+       dev->q[dev->cfg.queue_select].vq.qa = dev->cfg.queue_address;
+       dev->q[dev->cfg.queue_select].ring = vaddr_mem(
+           dev->cfg.queue_address * VIRTIO_PAGE_SIZE,
+           vring_size(VIOBLK_QUEUE_SIZE));
 }
 
 void
@@ -336,375 +375,184 @@ vioblk_update_qs(struct vioblk_dev *dev)
        }
 
        /* Update queue address/size based on queue select */
-       dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
-       dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
+       dev->cfg.queue_address = dev->q[dev->cfg.queue_select].vq.qa;
+       dev->cfg.queue_size = dev->q[dev->cfg.queue_select].vq.qs;
 }
 
-static void
-vioblk_free_info(struct ioinfo *info)
+static int
+vioblk_complete(struct vring_desc *desc, uint8_t ds)
 {
-       if (!info)
-               return;
-       free(info->buf);
-       free(info);
+       if (write_mem(desc->addr, &ds, MIN(desc->len, sizeof(ds)))) {
+               log_warnx("can't write device status data @ "
+                   "0x%llx", desc->addr);
+       }
+
+       return (0);
 }
 
-static struct ioinfo *
-vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz)
+static int
+vioblk_io_skip(struct vring_desc *vring, struct vring_desc *desc)
 {
-       struct ioinfo *info;
+       unsigned int idx;
 
-       info = calloc(1, sizeof(*info));
-       if (!info)
-               goto nomem;
-       info->buf = malloc(sz);
-       if (info->buf == NULL)
-               goto nomem;
-       info->len = sz;
-       info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
-       info->fd = dev->fd;
-
-       return info;
+       do {
+               idx = desc->next & VIOBLK_QUEUE_MASK;
+               desc = &vring[idx];
+       } while (ISSET(desc->flags, VRING_DESC_F_NEXT));
 
-nomem:
-       free(info);
-       log_warn("malloc errror vioblk read");
-       return (NULL);
+       return (vioblk_complete(desc, VIRTIO_BLK_S_IOERR));
 }
 
-
-static const uint8_t *
-vioblk_finish_read(struct ioinfo *info)
-{
-       if (pread(info->fd, info->buf, info->len, info->offset) != info->len) {
-               info->error = errno;
-               log_warn("vioblk read error");
-               return NULL;
+static int
+vioblk_io(struct vioblk_dev *dev,
+    ssize_t (*piov)(int, const struct iovec *, int, off_t),
+    const struct virtio_blk_req_hdr *hdr,
+    struct vring_desc *desc,
+    struct vring_desc *vring)
+{
+       struct iovec iov[128];
+       int cnt, iovcnt = 0;
+       unsigned int idx;
+       ssize_t rv;
+
+       idx = desc->next & VIOBLK_QUEUE_MASK;
+       desc = &vring[idx];
+
+       if (!ISSET(desc->flags, VRING_DESC_F_NEXT)) {
+               log_warnx("unchained vioblk data descriptor "
+                   "received (idx %u)", idx);
+               return (-1);
        }
 
-       return info->buf;
-}
-
-static struct ioinfo *
-vioblk_start_write(struct vioblk_dev *dev, off_t sector, paddr_t addr, size_t 
len)
-{
-       struct ioinfo *info;
-
-       info = calloc(1, sizeof(*info));
-       if (!info)
-               goto nomem;
-       info->buf = malloc(len);
-       if (info->buf == NULL)
-               goto nomem;
-       info->len = len;
-       info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
-       info->fd = dev->fd;
-
-       if (read_mem(addr, info->buf, len)) {
-               vioblk_free_info(info);
-               return NULL;
-       }
+       do {
+               cnt = iovec_mem(desc->addr, desc->len,
+                   iov + iovcnt, nitems(iov) - iovcnt);
+               if (cnt == -1) {
+                       log_warnx("invalid dma address 0x%016llx",
+                           desc->addr);
+                       return vioblk_io_skip(vring, desc);
+               }
+
+               iovcnt += cnt;
+               if (iovcnt == nitems(iov)) {
+                       log_warnx("%s: iov is too small", __func__);
+                       return vioblk_io_skip(vring, desc);
+               }
+
+               idx = desc->next & VIOBLK_QUEUE_MASK;
+               desc = &vring[idx];
+       } while (ISSET(desc->flags, VRING_DESC_F_NEXT));
 
-       return info;
+       do {
+               rv = (*piov)(dev->fd, iov, iovcnt,
+                   hdr->sector * VIRTIO_BLK_SECTOR_SIZE);
+       } while (rv == -1 && errno == EINTR);
 
-nomem:
-       free(info);
-       log_warn("malloc errror vioblk write");
-       return (NULL);
-}
+       if (rv == -1)
+               log_warn("boo hiss @ %llu", hdr->sector);
 
-static int
-vioblk_finish_write(struct ioinfo *info)
-{
-       if (pwrite(info->fd, info->buf, info->len, info->offset) != info->len) {
-               log_warn("vioblk write error");
-               return EIO;
-       }
-       return 0;
+       return vioblk_complete(desc,
+           rv == -1 ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK);
 }
 
 /*
  * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can
  * XXX cant trust ring data from VM, be extra cautious.
  */
-int
-vioblk_notifyq(struct vioblk_dev *dev)
+static void
+vioblk_q(void *arg)
 {
-       uint64_t q_gpa;
-       uint32_t vr_sz;
-       uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
-       uint8_t ds;
-       int ret;
-       off_t secbias;
+       struct vioblk_queue *queue = arg;
+       struct vioblk_dev *dev = queue->dev;
+       uint16_t cmd_desc_idx, desc_idx;
        char *vr;
-       struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
+       struct vring_desc *desc, *cmd_desc;
        struct vring_avail *avail;
        struct vring_used *used;
        struct virtio_blk_req_hdr cmd;
+       unsigned int prod, cons;
+       uint8_t ds;
 
-       ret = 0;
-
-       /* Invalid queue? */
-       if (dev->cfg.queue_notify > 0)
-               return (0);
-
-       vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
-       q_gpa = dev->vq[dev->cfg.queue_notify].qa;
-       q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
-
-       vr = calloc(1, vr_sz);
-       if (vr == NULL) {
-               log_warn("calloc error getting vioblk ring");
-               return (0);
-       }
-
-       if (read_mem(q_gpa, vr, vr_sz)) {
-               log_warnx("error reading gpa 0x%llx", q_gpa);
-               goto out;
-       }
+       vr = queue->ring;
+       if (vr == NULL)
+               return;
 
        /* Compute offsets in ring of descriptors, avail ring, and used ring */
        desc = (struct vring_desc *)(vr);
-       avail = (struct vring_avail *)(vr +
-           dev->vq[dev->cfg.queue_notify].vq_availoffset);
-       used = (struct vring_used *)(vr +
-           dev->vq[dev->cfg.queue_notify].vq_usedoffset);
+       avail = (struct vring_avail *)(vr + queue->vq.vq_availoffset);
+       used = (struct vring_used *)(vr + queue->vq.vq_usedoffset);
 
-       idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
+       cons = queue->vq.last_avail & VIOBLK_QUEUE_MASK;
+       prod = avail->idx & VIOBLK_QUEUE_MASK;
 
-       if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
-               log_warnx("vioblk queue notify - nothing to do?");
-               goto out;
-       }
-
-       while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
+       if (cons == prod)
+               return;
 
-               cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
+       membar_consumer();
+       do {
+               cmd_desc_idx = avail->ring[cons] & VIOBLK_QUEUE_MASK;
                cmd_desc = &desc[cmd_desc_idx];
 
-               if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) {
+               if (!ISSET(cmd_desc->flags, VRING_DESC_F_NEXT)) {
                        log_warnx("unchained vioblk cmd descriptor received "
                            "(idx %d)", cmd_desc_idx);
-                       goto out;
+                       break;
                }
 
                /* Read command from descriptor ring */
                if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) {
                        log_warnx("vioblk: command read_mem error @ 0x%llx",
                            cmd_desc->addr);
-                       goto out;
+                       break;
                }
 
                switch (cmd.type) {
                case VIRTIO_BLK_T_IN:
-                       /* first descriptor */
-                       secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
-                       secdata_desc = &desc[secdata_desc_idx];
-
-                       if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
-                               log_warnx("unchained vioblk data descriptor "
-                                   "received (idx %d)", cmd_desc_idx);
-                               goto out;
-                       }
-
-                       secbias = 0;
-                       do {
-                               struct ioinfo *info;
-                               const uint8_t *secdata;
-
-                               info = vioblk_start_read(dev, cmd.sector + 
secbias,
-                                   (ssize_t)secdata_desc->len);
-
-                               /* read the data (use current data descriptor) 
*/
-                               secdata = vioblk_finish_read(info);
-                               if (secdata == NULL) {
-                                       vioblk_free_info(info);
-                                       log_warnx("vioblk: block read error, "
-                                           "sector %lld", cmd.sector);
-                                       goto out;
-                               }
-
-                               if (write_mem(secdata_desc->addr, secdata,
-                                   secdata_desc->len)) {
-                                       log_warnx("can't write sector "
-                                           "data to gpa @ 0x%llx",
-                                           secdata_desc->addr);
-                                       dump_descriptor_chain(desc, 
cmd_desc_idx);
-                                       vioblk_free_info(info);
-                                       goto out;
-                               }
-
-                               vioblk_free_info(info);
-
-                               secbias += (secdata_desc->len / 
VIRTIO_BLK_SECTOR_SIZE);
-                               secdata_desc_idx = secdata_desc->next &
-                                   VIOBLK_QUEUE_MASK;
-                               secdata_desc = &desc[secdata_desc_idx];
-                       } while (secdata_desc->flags & VRING_DESC_F_NEXT);
-
-                       ds_desc_idx = secdata_desc_idx;
-                       ds_desc = secdata_desc;
-
-                       ds = VIRTIO_BLK_S_OK;
-                       if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
-                               log_warnx("can't write device status data @ "
-                                   "0x%llx", ds_desc->addr);
-                               dump_descriptor_chain(desc, cmd_desc_idx);
-                               goto out;
-                       }
-
-                       ret = 1;
-                       dev->cfg.isr_status = 1;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 
cmd_desc_idx;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 
cmd_desc->len;
-                       used->idx++;
-
-                       dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
-                           VIOBLK_QUEUE_MASK;
+                       if (vioblk_io(dev, preadv, &cmd, cmd_desc, desc) != 0)
+                               goto fail;
 
-                       if (write_mem(q_gpa, vr, vr_sz)) {
-                               log_warnx("vioblk: error writing vio ring");
-                       }
                        break;
                case VIRTIO_BLK_T_OUT:
-                       secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
-                       secdata_desc = &desc[secdata_desc_idx];
-
-                       if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
-                               log_warnx("wr vioblk: unchained vioblk data "
-                                   "descriptor received (idx %d)",
-                                   cmd_desc_idx);
-                               goto out;
-                       }
-
-                       if (secdata_desc->len > dev->max_xfer) {
-                               log_warnx("%s: invalid read size %d requested",
-                                   __func__, secdata_desc->len);
-                               goto out;
-                       }
-
-                       secbias = 0;
-                       do {
-                               struct ioinfo *info;
-
-                               info = vioblk_start_write(dev, cmd.sector + 
secbias,
-                                   secdata_desc->addr, secdata_desc->len);
-
-                               if (info == NULL) {
-                                       log_warnx("wr vioblk: can't read "
-                                           "sector data @ 0x%llx",
-                                           secdata_desc->addr);
-                                       dump_descriptor_chain(desc,
-                                           cmd_desc_idx);
-                                       goto out;
-                               }
-
-                               if (vioblk_finish_write(info)) {
-                                       log_warnx("wr vioblk: disk write "
-                                           "error");
-                                       vioblk_free_info(info);
-                                       goto out;
-                               }
-
-                               vioblk_free_info(info);
-
-                               secbias += secdata_desc->len /
-                                   VIRTIO_BLK_SECTOR_SIZE;
-
-                               secdata_desc_idx = secdata_desc->next &
-                                   VIOBLK_QUEUE_MASK;
-                               secdata_desc = &desc[secdata_desc_idx];
-                       } while (secdata_desc->flags & VRING_DESC_F_NEXT);
-
-                       ds_desc_idx = secdata_desc_idx;
-                       ds_desc = secdata_desc;
-
-                       ds = VIRTIO_BLK_S_OK;
-                       if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
-                               log_warnx("wr vioblk: can't write device "
-                                   "status data @ 0x%llx", ds_desc->addr);
-                               dump_descriptor_chain(desc, cmd_desc_idx);
-                               goto out;
-                       }
+                       if (vioblk_io(dev, pwritev, &cmd, cmd_desc, desc) != 0)
+                               goto fail;
 
-                       ret = 1;
-                       dev->cfg.isr_status = 1;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
-                           cmd_desc_idx;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
-                           cmd_desc->len;
-                       used->idx++;
-
-                       dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
-                           VIOBLK_QUEUE_MASK;
-                       if (write_mem(q_gpa, vr, vr_sz))
-                               log_warnx("wr vioblk: error writing vio ring");
                        break;
+
                case VIRTIO_BLK_T_FLUSH:
                case VIRTIO_BLK_T_FLUSH_OUT:
-                       ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
-                       ds_desc = &desc[ds_desc_idx];
-
                        ds = VIRTIO_BLK_S_OK;
-                       if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
-                               log_warnx("fl vioblk: can't write device status 
"
-                                   "data @ 0x%llx", ds_desc->addr);
-                               dump_descriptor_chain(desc, cmd_desc_idx);
-                               goto out;
-                       }
+                       if (fsync(dev->fd) == -1)
+                               ds = VIRTIO_BLK_S_IOERR;
 
-                       ret = 1;
-                       dev->cfg.isr_status = 1;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
-                           cmd_desc_idx;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
-                           cmd_desc->len;
-                       used->idx++;
+                       desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
+                       desc = &desc[desc_idx];
 
-                       dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
-                           VIOBLK_QUEUE_MASK;
-                       if (write_mem(q_gpa, vr, vr_sz)) {
-                               log_warnx("fl vioblk: error writing vio ring");
-                       }
+                       vioblk_complete(desc, ds);
                        break;
+
                default:
                        log_warnx("%s: unsupported command 0x%x", __func__,
                            cmd.type);
 
-                       ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
-                       ds_desc = &desc[ds_desc_idx];
+                       vioblk_io_skip(desc, cmd_desc);
+                       break;
+               }
 
-                       ds = VIRTIO_BLK_S_UNSUPP;
-                       if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
-                               log_warnx("%s: get id : can't write device "
-                                   "status data @ 0x%llx", __func__,
-                                   ds_desc->addr);
-                               dump_descriptor_chain(desc, cmd_desc_idx);
-                               goto out;
-                       }
+               used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
+               used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
+               membar_producer();
+               used->idx++;
 
-                       ret = 1;
-                       dev->cfg.isr_status = 1;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
-                           cmd_desc_idx;
-                       used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
-                           cmd_desc->len;
-                       used->idx++;
+               cons++;
+               cons &= VIOBLK_QUEUE_MASK;
+       } while (cons != prod);
 
-                       dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
-                           VIOBLK_QUEUE_MASK;
-                       if (write_mem(q_gpa, vr, vr_sz)) {
-                               log_warnx("%s: get id : error writing vio ring",
-                                   __func__);
-                       }
-                       break;
-               }
+fail:
+       queue->vq.last_avail = cons;
 
-               idx = (idx + 1) & VIOBLK_QUEUE_MASK;
-       }
-out:
-       free(vr);
-       return (ret);
+       vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq);
+
+       dev->cfg.isr_status = 1;
 }
 
 int
@@ -737,8 +585,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
                        break;
                case VIRTIO_CONFIG_QUEUE_NOTIFY:
                        dev->cfg.queue_notify = *data;
-                       if (vioblk_notifyq(dev))
-                               *intr = 1;
+                       task_add(iotq, &dev->q[0].t);
                        break;
                case VIRTIO_CONFIG_DEVICE_STATUS:
                        dev->cfg.device_status = *data;
@@ -752,7 +599,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
                                dev->cfg.queue_select = 0;
                                dev->cfg.queue_notify = 0;
                                dev->cfg.isr_status = 0;
-                               dev->vq[0].last_avail = 0;
+                               dev->q[0].vq.last_avail = 0;
                        }
                        break;
                default:
@@ -1710,6 +1557,10 @@ virtio_init(struct vmd_vm *vm, int *chil
                        return;
                }
 
+               iotq = taskq_create("iotq");
+               if (iotq == NULL)
+                       fatalx("unable to create io taskq");
+
                /* One virtio block device for each disk defined in vcp */
                for (i = 0; i < vcp->vcp_ndisks; i++) {
                        if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1)
@@ -1731,18 +1582,26 @@ virtio_init(struct vmd_vm *vm, int *chil
                                    "device", __progname);
                                return;
                        }
-                       vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE;
-                       vioblk[i].vq[0].vq_availoffset =
+
+                       vioblk[i].q[0].dev = &vioblk[i];
+                       vioblk[i].q[0].ring = NULL;
+                       vioblk[i].q[0].vq.qs = VIOBLK_QUEUE_SIZE;
+                       vioblk[i].q[0].vq.vq_availoffset =
                            sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
-                       vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
+                       vioblk[i].q[0].vq.vq_usedoffset = VIRTQUEUE_ALIGN(
                            sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
                            + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
-                       vioblk[i].vq[0].last_avail = 0;
+                       vioblk[i].q[0].vq.last_avail = 0;
                        vioblk[i].fd = child_disks[i];
                        vioblk[i].sz = sz / 512;
                        vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX;
                        vioblk[i].max_xfer = 1048576;
+
+                       vioblk[i].vm_id = vcp->vcp_id;
+                       vioblk[i].irq = pci_get_dev_irq(id);
                        vioblk[i].pci_id = id;
+
+                       task_set(&vioblk[i].q[0].t, vioblk_q, &vioblk[i].q[0]);
                }
        }
 
Index: virtio.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
retrieving revision 1.21
diff -u -p -r1.21 virtio.h
--- virtio.h    17 Sep 2017 23:07:56 -0000      1.21
+++ virtio.h    18 Sep 2017 00:39:00 -0000
@@ -99,18 +99,6 @@ struct viornd_dev {
        uint8_t pci_id;
 };
 
-struct vioblk_dev {
-       struct virtio_io_cfg cfg;
-
-       struct virtio_vq_info vq[VIRTIO_MAX_QUEUES];
-
-       int fd;
-       uint64_t sz;
-       uint32_t max_xfer;
-
-       uint8_t pci_id;
-};
-
 struct vionet_dev {
        pthread_mutex_t mutex;
        struct event event;
@@ -179,13 +167,6 @@ int viornd_restore(int);
 void viornd_update_qs(void);
 void viornd_update_qa(void);
 int viornd_notifyq(void);
-
-int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
-int vioblk_dump(int);
-int vioblk_restore(int, struct vm_create_params *, int *);
-void vioblk_update_qs(struct vioblk_dev *);
-void vioblk_update_qa(struct vioblk_dev *);
-int vioblk_notifyq(struct vioblk_dev *);
 
 int virtio_net_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
 int vionet_dump(int);
Index: vm.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
retrieving revision 1.27
diff -u -p -r1.27 vm.c
--- vm.c        17 Sep 2017 23:07:56 -0000      1.27
+++ vm.c        18 Sep 2017 00:39:00 -0000
@@ -1578,7 +1578,7 @@ vaddr_mem(paddr_t gpa, size_t len)
                if (gpa < vmr->vmr_gpa)
                        continue;
 
-               if (gpend >= vmr->vmr_gpa + vmr->vmr_size)
+               if (gpend > vmr->vmr_gpa + vmr->vmr_size)
                        continue;
 
                return ((char *)vmr->vmr_va + (gpa - vmr->vmr_gpa));

Reply via email to