On Mon, Sep 18, 2017 at 10:48:36AM +1000, David Gwynne wrote:
> vmd currently performs disk io for the guest synchronously in the
> same thread the guest is running in. it also uses bounce buffers
> in between the guests "physical" memory and the read and writes
> against the disk image.
>
> this diff moves the vioblk ring processing into a taskq, ie, another
> thread. this allows the guest to run while vmd is doing the reads
> and writes against the disk image. currently vmd only creates a
> single taskq for all vioblk devices to use. when the guest posts
> and queue notify write, vmd simple task_adds the ring processing
> to the taskq. when the ring is processed, vmd updates the completion
> queue and posts an interrupt to the guest.
>
> this diff also takes advantage of the new vaddr_mem and iovec_mem
> APIs to avoid using bounce buffers between the guest and read/writes.
> when the guest configures a ring address, vaddr_mem is used to get
> direct access to the ring in vmds address space. reads and writes
> by the guest use iovec_mem to fill out an iovec array, which is
> then passed directly to preadv and pwritev.
>
> because ring and io is now performed on a different thread to the
> guests vcpu, memory ordering becomes a considering. this also adds
> the use of membar_consumer and membar_producer from sys/atomic.h
> to ensure the ring updates become visibile to the guest in the
> correct order.
>
> i would appreciate tests, particularly with vmd on i386 so i can
> know if the atomic api is available there.
>
vmd on i386 needs to be brought up to date again with amd64. Nothing
special, just need to spend a few hours diffing trees. For now though,
let's not let that block moving forward here.
> the vm.c chunk is a result of testing with seabios, which likes to
> place the ring at the end of the physical address space.
>
> ok?
>
> Index: Makefile
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/Makefile,v
> retrieving revision 1.16
> diff -u -p -r1.16 Makefile
> --- Makefile 3 Jul 2017 22:21:47 -0000 1.16
> +++ Makefile 18 Sep 2017 00:39:00 -0000
> @@ -7,6 +7,7 @@ SRCS= vmd.c control.c log.c priv.c proc
> SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
> SRCS+= ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c
> packet.c
> SRCS+= parse.y atomicio.c
> +SRCS+= task.c
>
> CFLAGS+= -Wall -I${.CURDIR}
> CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes
> Index: virtio.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
> retrieving revision 1.54
> diff -u -p -r1.54 virtio.c
> --- virtio.c 17 Sep 2017 23:07:56 -0000 1.54
> +++ virtio.c 18 Sep 2017 00:39:00 -0000
> @@ -18,6 +18,7 @@
>
> #include <sys/param.h> /* PAGE_SIZE */
> #include <sys/socket.h>
> +#include <sys/atomic.h> /* membars */
>
> #include <machine/vmmvar.h>
> #include <dev/pci/pcireg.h>
> @@ -43,14 +44,48 @@
> #include "virtio.h"
> #include "loadfile.h"
> #include "atomicio.h"
> +#include "task.h"
> +
> +#ifndef MIN
> +#define MIN(_a, _b) ((_a) < (_b) ? (_a) : (_b))
> +#endif
> +
> +#ifndef nitems
> +#define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
> +#endif
>
> extern char *__progname;
>
> +struct vioblk_queue {
> + struct vioblk_dev *dev;
> + void *ring;
> + struct virtio_vq_info vq;
> + struct task t;
> + struct event ev;
> +};
> +
> +struct vioblk_dev {
> + struct virtio_io_cfg cfg;
> +
> + struct vioblk_queue q[VIRTIO_MAX_QUEUES];
> +
> + int fd;
> + uint64_t sz;
> + uint32_t max_xfer;
> +
> + uint32_t vm_id;
> + int irq;
> +
> + uint8_t pci_id;
> +};
> +
> struct viornd_dev viornd;
> struct vioblk_dev *vioblk;
> struct vionet_dev *vionet;
> struct vmmci_dev vmmci;
>
> +struct taskq *iotq;
> +
> int nr_vionet;
> int nr_vioblk;
>
> @@ -62,13 +97,12 @@ int nr_vioblk;
> #define VMMCI_F_ACK (1<<1)
> #define VMMCI_F_SYNCRTC (1<<2)
>
> -struct ioinfo {
> - uint8_t *buf;
> - ssize_t len;
> - off_t offset;
> - int fd;
> - int error;
> -};
> +int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> +int vioblk_dump(int);
> +int vioblk_restore(int, struct vm_create_params *, int *);
> +void vioblk_update_qs(struct vioblk_dev *);
> +void vioblk_update_qa(struct vioblk_dev *);
> +int vioblk_notifyq(struct vioblk_dev *);
>
> const char *
> vioblk_cmd_name(uint32_t type)
> @@ -85,6 +119,7 @@ vioblk_cmd_name(uint32_t type)
> }
> }
>
> +#if 0
> static void
> dump_descriptor_chain(struct vring_desc *desc, int16_t dxx)
> {
> @@ -108,6 +143,7 @@ dump_descriptor_chain(struct vring_desc
> desc[dxx].flags,
> desc[dxx].next);
> }
> +#endif
>
> static const char *
> virtio_reg_name(uint8_t reg)
> @@ -323,7 +359,10 @@ vioblk_update_qa(struct vioblk_dev *dev)
> if (dev->cfg.queue_select > 0)
> return;
>
> - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
> + dev->q[dev->cfg.queue_select].vq.qa = dev->cfg.queue_address;
> + dev->q[dev->cfg.queue_select].ring = vaddr_mem(
> + dev->cfg.queue_address * VIRTIO_PAGE_SIZE,
> + vring_size(VIOBLK_QUEUE_SIZE));
> }
>
> void
> @@ -336,375 +375,184 @@ vioblk_update_qs(struct vioblk_dev *dev)
> }
>
> /* Update queue address/size based on queue select */
> - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
> - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
> + dev->cfg.queue_address = dev->q[dev->cfg.queue_select].vq.qa;
> + dev->cfg.queue_size = dev->q[dev->cfg.queue_select].vq.qs;
> }
>
> -static void
> -vioblk_free_info(struct ioinfo *info)
> +static int
> +vioblk_complete(struct vring_desc *desc, uint8_t ds)
> {
> - if (!info)
> - return;
> - free(info->buf);
> - free(info);
> + if (write_mem(desc->addr, &ds, MIN(desc->len, sizeof(ds)))) {
> + log_warnx("can't write device status data @ "
> + "0x%llx", desc->addr);
> + }
> +
> + return (0);
> }
>
> -static struct ioinfo *
> -vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz)
> +static int
> +vioblk_io_skip(struct vring_desc *vring, struct vring_desc *desc)
> {
> - struct ioinfo *info;
> + unsigned int idx;
>
> - info = calloc(1, sizeof(*info));
> - if (!info)
> - goto nomem;
> - info->buf = malloc(sz);
> - if (info->buf == NULL)
> - goto nomem;
> - info->len = sz;
> - info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
> - info->fd = dev->fd;
> -
> - return info;
> + do {
> + idx = desc->next & VIOBLK_QUEUE_MASK;
> + desc = &vring[idx];
> + } while (ISSET(desc->flags, VRING_DESC_F_NEXT));
>
> -nomem:
> - free(info);
> - log_warn("malloc errror vioblk read");
> - return (NULL);
> + return (vioblk_complete(desc, VIRTIO_BLK_S_IOERR));
> }
>
> -
> -static const uint8_t *
> -vioblk_finish_read(struct ioinfo *info)
> -{
> - if (pread(info->fd, info->buf, info->len, info->offset) != info->len) {
> - info->error = errno;
> - log_warn("vioblk read error");
> - return NULL;
> +static int
> +vioblk_io(struct vioblk_dev *dev,
> + ssize_t (*piov)(int, const struct iovec *, int, off_t),
> + const struct virtio_blk_req_hdr *hdr,
> + struct vring_desc *desc,
> + struct vring_desc *vring)
> +{
> + struct iovec iov[128];
> + int cnt, iovcnt = 0;
> + unsigned int idx;
> + ssize_t rv;
> +
> + idx = desc->next & VIOBLK_QUEUE_MASK;
> + desc = &vring[idx];
> +
> + if (!ISSET(desc->flags, VRING_DESC_F_NEXT)) {
> + log_warnx("unchained vioblk data descriptor "
> + "received (idx %u)", idx);
> + return (-1);
> }
>
> - return info->buf;
> -}
> -
> -static struct ioinfo *
> -vioblk_start_write(struct vioblk_dev *dev, off_t sector, paddr_t addr,
> size_t len)
> -{
> - struct ioinfo *info;
> -
> - info = calloc(1, sizeof(*info));
> - if (!info)
> - goto nomem;
> - info->buf = malloc(len);
> - if (info->buf == NULL)
> - goto nomem;
> - info->len = len;
> - info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
> - info->fd = dev->fd;
> -
> - if (read_mem(addr, info->buf, len)) {
> - vioblk_free_info(info);
> - return NULL;
> - }
> + do {
> + cnt = iovec_mem(desc->addr, desc->len,
> + iov + iovcnt, nitems(iov) - iovcnt);
> + if (cnt == -1) {
> + log_warnx("invalid dma address 0x%016llx",
> + desc->addr);
> + return vioblk_io_skip(vring, desc);
> + }
> +
> + iovcnt += cnt;
> + if (iovcnt == nitems(iov)) {
> + log_warnx("%s: iov is too small", __func__);
> + return vioblk_io_skip(vring, desc);
> + }
> +
> + idx = desc->next & VIOBLK_QUEUE_MASK;
> + desc = &vring[idx];
> + } while (ISSET(desc->flags, VRING_DESC_F_NEXT));
>
> - return info;
> + do {
> + rv = (*piov)(dev->fd, iov, iovcnt,
> + hdr->sector * VIRTIO_BLK_SECTOR_SIZE);
> + } while (rv == -1 && errno == EINTR);
>
> -nomem:
> - free(info);
> - log_warn("malloc errror vioblk write");
> - return (NULL);
> -}
> + if (rv == -1)
> + log_warn("boo hiss @ %llu", hdr->sector);
>
> -static int
> -vioblk_finish_write(struct ioinfo *info)
> -{
> - if (pwrite(info->fd, info->buf, info->len, info->offset) != info->len) {
> - log_warn("vioblk write error");
> - return EIO;
> - }
> - return 0;
> + return vioblk_complete(desc,
> + rv == -1 ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK);
> }
>
> /*
> * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can
> * XXX cant trust ring data from VM, be extra cautious.
> */
> -int
> -vioblk_notifyq(struct vioblk_dev *dev)
> +static void
> +vioblk_q(void *arg)
> {
> - uint64_t q_gpa;
> - uint32_t vr_sz;
> - uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
> - uint8_t ds;
> - int ret;
> - off_t secbias;
> + struct vioblk_queue *queue = arg;
> + struct vioblk_dev *dev = queue->dev;
> + uint16_t cmd_desc_idx, desc_idx;
> char *vr;
> - struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
> + struct vring_desc *desc, *cmd_desc;
> struct vring_avail *avail;
> struct vring_used *used;
> struct virtio_blk_req_hdr cmd;
> + unsigned int prod, cons;
> + uint8_t ds;
>
> - ret = 0;
> -
> - /* Invalid queue? */
> - if (dev->cfg.queue_notify > 0)
> - return (0);
> -
> - vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
> - q_gpa = dev->vq[dev->cfg.queue_notify].qa;
> - q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> -
> - vr = calloc(1, vr_sz);
> - if (vr == NULL) {
> - log_warn("calloc error getting vioblk ring");
> - return (0);
> - }
> -
> - if (read_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("error reading gpa 0x%llx", q_gpa);
> - goto out;
> - }
> + vr = queue->ring;
> + if (vr == NULL)
> + return;
>
> /* Compute offsets in ring of descriptors, avail ring, and used ring */
> desc = (struct vring_desc *)(vr);
> - avail = (struct vring_avail *)(vr +
> - dev->vq[dev->cfg.queue_notify].vq_availoffset);
> - used = (struct vring_used *)(vr +
> - dev->vq[dev->cfg.queue_notify].vq_usedoffset);
> + avail = (struct vring_avail *)(vr + queue->vq.vq_availoffset);
> + used = (struct vring_used *)(vr + queue->vq.vq_usedoffset);
>
> - idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
> + cons = queue->vq.last_avail & VIOBLK_QUEUE_MASK;
> + prod = avail->idx & VIOBLK_QUEUE_MASK;
>
> - if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
> - log_warnx("vioblk queue notify - nothing to do?");
> - goto out;
> - }
> -
> - while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
> + if (cons == prod)
> + return;
>
> - cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
> + membar_consumer();
> + do {
> + cmd_desc_idx = avail->ring[cons] & VIOBLK_QUEUE_MASK;
> cmd_desc = &desc[cmd_desc_idx];
>
> - if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) {
> + if (!ISSET(cmd_desc->flags, VRING_DESC_F_NEXT)) {
> log_warnx("unchained vioblk cmd descriptor received "
> "(idx %d)", cmd_desc_idx);
> - goto out;
> + break;
> }
>
> /* Read command from descriptor ring */
> if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) {
> log_warnx("vioblk: command read_mem error @ 0x%llx",
> cmd_desc->addr);
> - goto out;
> + break;
> }
>
> switch (cmd.type) {
> case VIRTIO_BLK_T_IN:
> - /* first descriptor */
> - secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> - secdata_desc = &desc[secdata_desc_idx];
> -
> - if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
> - log_warnx("unchained vioblk data descriptor "
> - "received (idx %d)", cmd_desc_idx);
> - goto out;
> - }
> -
> - secbias = 0;
> - do {
> - struct ioinfo *info;
> - const uint8_t *secdata;
> -
> - info = vioblk_start_read(dev, cmd.sector +
> secbias,
> - (ssize_t)secdata_desc->len);
> -
> - /* read the data (use current data descriptor)
> */
> - secdata = vioblk_finish_read(info);
> - if (secdata == NULL) {
> - vioblk_free_info(info);
> - log_warnx("vioblk: block read error, "
> - "sector %lld", cmd.sector);
> - goto out;
> - }
> -
> - if (write_mem(secdata_desc->addr, secdata,
> - secdata_desc->len)) {
> - log_warnx("can't write sector "
> - "data to gpa @ 0x%llx",
> - secdata_desc->addr);
> - dump_descriptor_chain(desc,
> cmd_desc_idx);
> - vioblk_free_info(info);
> - goto out;
> - }
> -
> - vioblk_free_info(info);
> -
> - secbias += (secdata_desc->len /
> VIRTIO_BLK_SECTOR_SIZE);
> - secdata_desc_idx = secdata_desc->next &
> - VIOBLK_QUEUE_MASK;
> - secdata_desc = &desc[secdata_desc_idx];
> - } while (secdata_desc->flags & VRING_DESC_F_NEXT);
> -
> - ds_desc_idx = secdata_desc_idx;
> - ds_desc = secdata_desc;
> -
> - ds = VIRTIO_BLK_S_OK;
> - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> - log_warnx("can't write device status data @ "
> - "0x%llx", ds_desc->addr);
> - dump_descriptor_chain(desc, cmd_desc_idx);
> - goto out;
> - }
> -
> - ret = 1;
> - dev->cfg.isr_status = 1;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
> cmd_desc_idx;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
> cmd_desc->len;
> - used->idx++;
> -
> - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> - VIOBLK_QUEUE_MASK;
> + if (vioblk_io(dev, preadv, &cmd, cmd_desc, desc) != 0)
> + goto fail;
>
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("vioblk: error writing vio ring");
> - }
> break;
> case VIRTIO_BLK_T_OUT:
> - secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> - secdata_desc = &desc[secdata_desc_idx];
> -
> - if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
> - log_warnx("wr vioblk: unchained vioblk data "
> - "descriptor received (idx %d)",
> - cmd_desc_idx);
> - goto out;
> - }
> -
> - if (secdata_desc->len > dev->max_xfer) {
> - log_warnx("%s: invalid read size %d requested",
> - __func__, secdata_desc->len);
> - goto out;
> - }
> -
> - secbias = 0;
> - do {
> - struct ioinfo *info;
> -
> - info = vioblk_start_write(dev, cmd.sector +
> secbias,
> - secdata_desc->addr, secdata_desc->len);
> -
> - if (info == NULL) {
> - log_warnx("wr vioblk: can't read "
> - "sector data @ 0x%llx",
> - secdata_desc->addr);
> - dump_descriptor_chain(desc,
> - cmd_desc_idx);
> - goto out;
> - }
> -
> - if (vioblk_finish_write(info)) {
> - log_warnx("wr vioblk: disk write "
> - "error");
> - vioblk_free_info(info);
> - goto out;
> - }
> -
> - vioblk_free_info(info);
> -
> - secbias += secdata_desc->len /
> - VIRTIO_BLK_SECTOR_SIZE;
> -
> - secdata_desc_idx = secdata_desc->next &
> - VIOBLK_QUEUE_MASK;
> - secdata_desc = &desc[secdata_desc_idx];
> - } while (secdata_desc->flags & VRING_DESC_F_NEXT);
> -
> - ds_desc_idx = secdata_desc_idx;
> - ds_desc = secdata_desc;
> -
> - ds = VIRTIO_BLK_S_OK;
> - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> - log_warnx("wr vioblk: can't write device "
> - "status data @ 0x%llx", ds_desc->addr);
> - dump_descriptor_chain(desc, cmd_desc_idx);
> - goto out;
> - }
> + if (vioblk_io(dev, pwritev, &cmd, cmd_desc, desc) != 0)
> + goto fail;
>
> - ret = 1;
> - dev->cfg.isr_status = 1;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
> - cmd_desc_idx;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
> - cmd_desc->len;
> - used->idx++;
> -
> - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> - VIOBLK_QUEUE_MASK;
> - if (write_mem(q_gpa, vr, vr_sz))
> - log_warnx("wr vioblk: error writing vio ring");
> break;
> +
> case VIRTIO_BLK_T_FLUSH:
> case VIRTIO_BLK_T_FLUSH_OUT:
> - ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> - ds_desc = &desc[ds_desc_idx];
> -
> ds = VIRTIO_BLK_S_OK;
> - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> - log_warnx("fl vioblk: can't write device status
> "
> - "data @ 0x%llx", ds_desc->addr);
> - dump_descriptor_chain(desc, cmd_desc_idx);
> - goto out;
> - }
> + if (fsync(dev->fd) == -1)
> + ds = VIRTIO_BLK_S_IOERR;
>
> - ret = 1;
> - dev->cfg.isr_status = 1;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
> - cmd_desc_idx;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
> - cmd_desc->len;
> - used->idx++;
> + desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> + desc = &desc[desc_idx];
>
> - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> - VIOBLK_QUEUE_MASK;
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("fl vioblk: error writing vio ring");
> - }
> + vioblk_complete(desc, ds);
> break;
> +
> default:
> log_warnx("%s: unsupported command 0x%x", __func__,
> cmd.type);
>
> - ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> - ds_desc = &desc[ds_desc_idx];
> + vioblk_io_skip(desc, cmd_desc);
> + break;
> + }
>
> - ds = VIRTIO_BLK_S_UNSUPP;
> - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> - log_warnx("%s: get id : can't write device "
> - "status data @ 0x%llx", __func__,
> - ds_desc->addr);
> - dump_descriptor_chain(desc, cmd_desc_idx);
> - goto out;
> - }
> + used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
> + used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
> + membar_producer();
> + used->idx++;
>
> - ret = 1;
> - dev->cfg.isr_status = 1;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
> - cmd_desc_idx;
> - used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
> - cmd_desc->len;
> - used->idx++;
> + cons++;
> + cons &= VIOBLK_QUEUE_MASK;
> + } while (cons != prod);
>
> - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> - VIOBLK_QUEUE_MASK;
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: get id : error writing vio ring",
> - __func__);
> - }
> - break;
> - }
> +fail:
> + queue->vq.last_avail = cons;
>
> - idx = (idx + 1) & VIOBLK_QUEUE_MASK;
> - }
> -out:
> - free(vr);
> - return (ret);
> + vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq);
> +
> + dev->cfg.isr_status = 1;
> }
>
> int
> @@ -737,8 +585,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
> break;
> case VIRTIO_CONFIG_QUEUE_NOTIFY:
> dev->cfg.queue_notify = *data;
> - if (vioblk_notifyq(dev))
> - *intr = 1;
> + task_add(iotq, &dev->q[0].t);
> break;
> case VIRTIO_CONFIG_DEVICE_STATUS:
> dev->cfg.device_status = *data;
> @@ -752,7 +599,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
> dev->cfg.queue_select = 0;
> dev->cfg.queue_notify = 0;
> dev->cfg.isr_status = 0;
> - dev->vq[0].last_avail = 0;
> + dev->q[0].vq.last_avail = 0;
> }
> break;
> default:
> @@ -1710,6 +1557,10 @@ virtio_init(struct vmd_vm *vm, int *chil
> return;
> }
>
> + iotq = taskq_create("iotq");
> + if (iotq == NULL)
> + fatalx("unable to create io taskq");
> +
> /* One virtio block device for each disk defined in vcp */
> for (i = 0; i < vcp->vcp_ndisks; i++) {
> if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1)
> @@ -1731,18 +1582,26 @@ virtio_init(struct vmd_vm *vm, int *chil
> "device", __progname);
> return;
> }
> - vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE;
> - vioblk[i].vq[0].vq_availoffset =
> +
> + vioblk[i].q[0].dev = &vioblk[i];
> + vioblk[i].q[0].ring = NULL;
> + vioblk[i].q[0].vq.qs = VIOBLK_QUEUE_SIZE;
> + vioblk[i].q[0].vq.vq_availoffset =
> sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
> - vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
> + vioblk[i].q[0].vq.vq_usedoffset = VIRTQUEUE_ALIGN(
> sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
> + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
> - vioblk[i].vq[0].last_avail = 0;
> + vioblk[i].q[0].vq.last_avail = 0;
> vioblk[i].fd = child_disks[i];
> vioblk[i].sz = sz / 512;
> vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX;
> vioblk[i].max_xfer = 1048576;
> +
> + vioblk[i].vm_id = vcp->vcp_id;
> + vioblk[i].irq = pci_get_dev_irq(id);
> vioblk[i].pci_id = id;
> +
> + task_set(&vioblk[i].q[0].t, vioblk_q, &vioblk[i].q[0]);
> }
> }
>
> Index: virtio.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
> retrieving revision 1.21
> diff -u -p -r1.21 virtio.h
> --- virtio.h 17 Sep 2017 23:07:56 -0000 1.21
> +++ virtio.h 18 Sep 2017 00:39:00 -0000
> @@ -99,18 +99,6 @@ struct viornd_dev {
> uint8_t pci_id;
> };
>
> -struct vioblk_dev {
> - struct virtio_io_cfg cfg;
> -
> - struct virtio_vq_info vq[VIRTIO_MAX_QUEUES];
> -
> - int fd;
> - uint64_t sz;
> - uint32_t max_xfer;
> -
> - uint8_t pci_id;
> -};
> -
> struct vionet_dev {
> pthread_mutex_t mutex;
> struct event event;
> @@ -179,13 +167,6 @@ int viornd_restore(int);
> void viornd_update_qs(void);
> void viornd_update_qa(void);
> int viornd_notifyq(void);
> -
> -int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> -int vioblk_dump(int);
> -int vioblk_restore(int, struct vm_create_params *, int *);
> -void vioblk_update_qs(struct vioblk_dev *);
> -void vioblk_update_qa(struct vioblk_dev *);
> -int vioblk_notifyq(struct vioblk_dev *);
>
> int virtio_net_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> int vionet_dump(int);
> Index: vm.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 vm.c
> --- vm.c 17 Sep 2017 23:07:56 -0000 1.27
> +++ vm.c 18 Sep 2017 00:39:00 -0000
> @@ -1578,7 +1578,7 @@ vaddr_mem(paddr_t gpa, size_t len)
> if (gpa < vmr->vmr_gpa)
> continue;
>
> - if (gpend >= vmr->vmr_gpa + vmr->vmr_size)
> + if (gpend > vmr->vmr_gpa + vmr->vmr_size)
> continue;
>
> return ((char *)vmr->vmr_va + (gpa - vmr->vmr_gpa));