On Mon, Sep 18, 2017 at 10:48:36AM +1000, David Gwynne wrote:
> vmd currently performs disk io for the guest synchronously in the
> same thread the guest is running in. it also uses bounce buffers
> in between the guests "physical" memory and the read and writes
> against the disk image.
> 
> this diff moves the vioblk ring processing into a taskq, ie, another
> thread. this allows the guest to run while vmd is doing the reads
> and writes against the disk image. currently vmd only creates a
> single taskq for all vioblk devices to use. when the guest posts
> and queue notify write, vmd simple task_adds the ring processing
> to the taskq. when the ring is processed, vmd updates the completion
> queue and posts an interrupt to the guest.
> 
> this diff also takes advantage of the new vaddr_mem and iovec_mem
> APIs to avoid using bounce buffers between the guest and read/writes.
> when the guest configures a ring address, vaddr_mem is used to get
> direct access to the ring in vmds address space. reads and writes
> by the guest use iovec_mem to fill out an iovec array, which is
> then passed directly to preadv and pwritev.
> 
> because ring and io is now performed on a different thread to the
> guests vcpu, memory ordering becomes a considering. this also adds
> the use of membar_consumer and membar_producer from sys/atomic.h
> to ensure the ring updates become visibile to the guest in the
> correct order.
> 
> i would appreciate tests, particularly with vmd on i386 so i can
> know if the atomic api is available there.
> 

vmd on i386 needs to be brought up to date again with amd64. Nothing
special, just need to spend a few hours diffing trees. For now though,
let's not let that block moving forward here.

> the vm.c chunk is a result of testing with seabios, which likes to
> place the ring at the end of the physical address space.
> 
> ok?
> 
> Index: Makefile
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/Makefile,v
> retrieving revision 1.16
> diff -u -p -r1.16 Makefile
> --- Makefile  3 Jul 2017 22:21:47 -0000       1.16
> +++ Makefile  18 Sep 2017 00:39:00 -0000
> @@ -7,6 +7,7 @@ SRCS=         vmd.c control.c log.c priv.c proc
>  SRCS+=               vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
>  SRCS+=               ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c 
> packet.c
>  SRCS+=               parse.y atomicio.c
> +SRCS+=               task.c
>  
>  CFLAGS+=     -Wall -I${.CURDIR}
>  CFLAGS+=     -Wstrict-prototypes -Wmissing-prototypes
> Index: virtio.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
> retrieving revision 1.54
> diff -u -p -r1.54 virtio.c
> --- virtio.c  17 Sep 2017 23:07:56 -0000      1.54
> +++ virtio.c  18 Sep 2017 00:39:00 -0000
> @@ -18,6 +18,7 @@
>  
>  #include <sys/param.h>       /* PAGE_SIZE */
>  #include <sys/socket.h>
> +#include <sys/atomic.h> /* membars */
>  
>  #include <machine/vmmvar.h>
>  #include <dev/pci/pcireg.h>
> @@ -43,14 +44,48 @@
>  #include "virtio.h"
>  #include "loadfile.h"
>  #include "atomicio.h"
> +#include "task.h"
> +
> +#ifndef MIN
> +#define MIN(_a, _b)  ((_a) < (_b) ? (_a) : (_b))
> +#endif
> +
> +#ifndef nitems
> +#define nitems(_a)   (sizeof(_a) / sizeof((_a)[0]))
> +#endif
>  
>  extern char *__progname;
>  
> +struct vioblk_queue {
> +     struct vioblk_dev       *dev;
> +     void                    *ring;
> +     struct virtio_vq_info    vq;
> +     struct task              t;
> +     struct event             ev;
> +};
> +
> +struct vioblk_dev {
> +     struct virtio_io_cfg cfg;
> +
> +     struct vioblk_queue q[VIRTIO_MAX_QUEUES];
> +
> +     int fd;
> +     uint64_t sz;
> +     uint32_t max_xfer;
> +
> +     uint32_t vm_id;
> +     int irq;
> +
> +     uint8_t pci_id;
> +};
> +
>  struct viornd_dev viornd;
>  struct vioblk_dev *vioblk;
>  struct vionet_dev *vionet;
>  struct vmmci_dev vmmci;
>  
> +struct taskq *iotq;
> +
>  int nr_vionet;
>  int nr_vioblk;
>  
> @@ -62,13 +97,12 @@ int nr_vioblk;
>  #define VMMCI_F_ACK          (1<<1)
>  #define VMMCI_F_SYNCRTC              (1<<2)
>  
> -struct ioinfo {
> -     uint8_t *buf;
> -     ssize_t len;
> -     off_t offset;
> -     int fd;
> -     int error;
> -};
> +int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> +int vioblk_dump(int);
> +int vioblk_restore(int, struct vm_create_params *, int *);
> +void vioblk_update_qs(struct vioblk_dev *);
> +void vioblk_update_qa(struct vioblk_dev *);
> +int vioblk_notifyq(struct vioblk_dev *);
>  
>  const char *
>  vioblk_cmd_name(uint32_t type)
> @@ -85,6 +119,7 @@ vioblk_cmd_name(uint32_t type)
>       }
>  }
>  
> +#if 0
>  static void
>  dump_descriptor_chain(struct vring_desc *desc, int16_t dxx)
>  {
> @@ -108,6 +143,7 @@ dump_descriptor_chain(struct vring_desc 
>           desc[dxx].flags,
>           desc[dxx].next);
>  }
> +#endif
>  
>  static const char *
>  virtio_reg_name(uint8_t reg)
> @@ -323,7 +359,10 @@ vioblk_update_qa(struct vioblk_dev *dev)
>       if (dev->cfg.queue_select > 0)
>               return;
>  
> -     dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
> +     dev->q[dev->cfg.queue_select].vq.qa = dev->cfg.queue_address;
> +     dev->q[dev->cfg.queue_select].ring = vaddr_mem(
> +         dev->cfg.queue_address * VIRTIO_PAGE_SIZE,
> +         vring_size(VIOBLK_QUEUE_SIZE));
>  }
>  
>  void
> @@ -336,375 +375,184 @@ vioblk_update_qs(struct vioblk_dev *dev)
>       }
>  
>       /* Update queue address/size based on queue select */
> -     dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
> -     dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
> +     dev->cfg.queue_address = dev->q[dev->cfg.queue_select].vq.qa;
> +     dev->cfg.queue_size = dev->q[dev->cfg.queue_select].vq.qs;
>  }
>  
> -static void
> -vioblk_free_info(struct ioinfo *info)
> +static int
> +vioblk_complete(struct vring_desc *desc, uint8_t ds)
>  {
> -     if (!info)
> -             return;
> -     free(info->buf);
> -     free(info);
> +     if (write_mem(desc->addr, &ds, MIN(desc->len, sizeof(ds)))) {
> +             log_warnx("can't write device status data @ "
> +                 "0x%llx", desc->addr);
> +     }
> +
> +     return (0);
>  }
>  
> -static struct ioinfo *
> -vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz)
> +static int
> +vioblk_io_skip(struct vring_desc *vring, struct vring_desc *desc)
>  {
> -     struct ioinfo *info;
> +     unsigned int idx;
>  
> -     info = calloc(1, sizeof(*info));
> -     if (!info)
> -             goto nomem;
> -     info->buf = malloc(sz);
> -     if (info->buf == NULL)
> -             goto nomem;
> -     info->len = sz;
> -     info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
> -     info->fd = dev->fd;
> -
> -     return info;
> +     do {
> +             idx = desc->next & VIOBLK_QUEUE_MASK;
> +             desc = &vring[idx];
> +     } while (ISSET(desc->flags, VRING_DESC_F_NEXT));
>  
> -nomem:
> -     free(info);
> -     log_warn("malloc errror vioblk read");
> -     return (NULL);
> +     return (vioblk_complete(desc, VIRTIO_BLK_S_IOERR));
>  }
>  
> -
> -static const uint8_t *
> -vioblk_finish_read(struct ioinfo *info)
> -{
> -     if (pread(info->fd, info->buf, info->len, info->offset) != info->len) {
> -             info->error = errno;
> -             log_warn("vioblk read error");
> -             return NULL;
> +static int
> +vioblk_io(struct vioblk_dev *dev,
> +    ssize_t (*piov)(int, const struct iovec *, int, off_t),
> +    const struct virtio_blk_req_hdr *hdr,
> +    struct vring_desc *desc,
> +    struct vring_desc *vring)
> +{
> +     struct iovec iov[128];
> +     int cnt, iovcnt = 0;
> +     unsigned int idx;
> +     ssize_t rv;
> +
> +     idx = desc->next & VIOBLK_QUEUE_MASK;
> +     desc = &vring[idx];
> +
> +     if (!ISSET(desc->flags, VRING_DESC_F_NEXT)) {
> +             log_warnx("unchained vioblk data descriptor "
> +                 "received (idx %u)", idx);
> +             return (-1);
>       }
>  
> -     return info->buf;
> -}
> -
> -static struct ioinfo *
> -vioblk_start_write(struct vioblk_dev *dev, off_t sector, paddr_t addr, 
> size_t len)
> -{
> -     struct ioinfo *info;
> -
> -     info = calloc(1, sizeof(*info));
> -     if (!info)
> -             goto nomem;
> -     info->buf = malloc(len);
> -     if (info->buf == NULL)
> -             goto nomem;
> -     info->len = len;
> -     info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
> -     info->fd = dev->fd;
> -
> -     if (read_mem(addr, info->buf, len)) {
> -             vioblk_free_info(info);
> -             return NULL;
> -     }
> +     do {
> +             cnt = iovec_mem(desc->addr, desc->len,
> +                 iov + iovcnt, nitems(iov) - iovcnt);
> +             if (cnt == -1) {
> +                     log_warnx("invalid dma address 0x%016llx",
> +                         desc->addr);
> +                     return vioblk_io_skip(vring, desc);
> +             }
> +
> +             iovcnt += cnt;
> +             if (iovcnt == nitems(iov)) {
> +                     log_warnx("%s: iov is too small", __func__);
> +                     return vioblk_io_skip(vring, desc);
> +             }
> +
> +             idx = desc->next & VIOBLK_QUEUE_MASK;
> +             desc = &vring[idx];
> +     } while (ISSET(desc->flags, VRING_DESC_F_NEXT));
>  
> -     return info;
> +     do {
> +             rv = (*piov)(dev->fd, iov, iovcnt,
> +                 hdr->sector * VIRTIO_BLK_SECTOR_SIZE);
> +     } while (rv == -1 && errno == EINTR);
>  
> -nomem:
> -     free(info);
> -     log_warn("malloc errror vioblk write");
> -     return (NULL);
> -}
> +     if (rv == -1)
> +             log_warn("boo hiss @ %llu", hdr->sector);
>  
> -static int
> -vioblk_finish_write(struct ioinfo *info)
> -{
> -     if (pwrite(info->fd, info->buf, info->len, info->offset) != info->len) {
> -             log_warn("vioblk write error");
> -             return EIO;
> -     }
> -     return 0;
> +     return vioblk_complete(desc,
> +         rv == -1 ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK);
>  }
>  
>  /*
>   * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can
>   * XXX cant trust ring data from VM, be extra cautious.
>   */
> -int
> -vioblk_notifyq(struct vioblk_dev *dev)
> +static void
> +vioblk_q(void *arg)
>  {
> -     uint64_t q_gpa;
> -     uint32_t vr_sz;
> -     uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
> -     uint8_t ds;
> -     int ret;
> -     off_t secbias;
> +     struct vioblk_queue *queue = arg;
> +     struct vioblk_dev *dev = queue->dev;
> +     uint16_t cmd_desc_idx, desc_idx;
>       char *vr;
> -     struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
> +     struct vring_desc *desc, *cmd_desc;
>       struct vring_avail *avail;
>       struct vring_used *used;
>       struct virtio_blk_req_hdr cmd;
> +     unsigned int prod, cons;
> +     uint8_t ds;
>  
> -     ret = 0;
> -
> -     /* Invalid queue? */
> -     if (dev->cfg.queue_notify > 0)
> -             return (0);
> -
> -     vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
> -     q_gpa = dev->vq[dev->cfg.queue_notify].qa;
> -     q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> -
> -     vr = calloc(1, vr_sz);
> -     if (vr == NULL) {
> -             log_warn("calloc error getting vioblk ring");
> -             return (0);
> -     }
> -
> -     if (read_mem(q_gpa, vr, vr_sz)) {
> -             log_warnx("error reading gpa 0x%llx", q_gpa);
> -             goto out;
> -     }
> +     vr = queue->ring;
> +     if (vr == NULL)
> +             return;
>  
>       /* Compute offsets in ring of descriptors, avail ring, and used ring */
>       desc = (struct vring_desc *)(vr);
> -     avail = (struct vring_avail *)(vr +
> -         dev->vq[dev->cfg.queue_notify].vq_availoffset);
> -     used = (struct vring_used *)(vr +
> -         dev->vq[dev->cfg.queue_notify].vq_usedoffset);
> +     avail = (struct vring_avail *)(vr + queue->vq.vq_availoffset);
> +     used = (struct vring_used *)(vr + queue->vq.vq_usedoffset);
>  
> -     idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
> +     cons = queue->vq.last_avail & VIOBLK_QUEUE_MASK;
> +     prod = avail->idx & VIOBLK_QUEUE_MASK;
>  
> -     if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
> -             log_warnx("vioblk queue notify - nothing to do?");
> -             goto out;
> -     }
> -
> -     while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
> +     if (cons == prod)
> +             return;
>  
> -             cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
> +     membar_consumer();
> +     do {
> +             cmd_desc_idx = avail->ring[cons] & VIOBLK_QUEUE_MASK;
>               cmd_desc = &desc[cmd_desc_idx];
>  
> -             if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) {
> +             if (!ISSET(cmd_desc->flags, VRING_DESC_F_NEXT)) {
>                       log_warnx("unchained vioblk cmd descriptor received "
>                           "(idx %d)", cmd_desc_idx);
> -                     goto out;
> +                     break;
>               }
>  
>               /* Read command from descriptor ring */
>               if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) {
>                       log_warnx("vioblk: command read_mem error @ 0x%llx",
>                           cmd_desc->addr);
> -                     goto out;
> +                     break;
>               }
>  
>               switch (cmd.type) {
>               case VIRTIO_BLK_T_IN:
> -                     /* first descriptor */
> -                     secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> -                     secdata_desc = &desc[secdata_desc_idx];
> -
> -                     if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
> -                             log_warnx("unchained vioblk data descriptor "
> -                                 "received (idx %d)", cmd_desc_idx);
> -                             goto out;
> -                     }
> -
> -                     secbias = 0;
> -                     do {
> -                             struct ioinfo *info;
> -                             const uint8_t *secdata;
> -
> -                             info = vioblk_start_read(dev, cmd.sector + 
> secbias,
> -                                 (ssize_t)secdata_desc->len);
> -
> -                             /* read the data (use current data descriptor) 
> */
> -                             secdata = vioblk_finish_read(info);
> -                             if (secdata == NULL) {
> -                                     vioblk_free_info(info);
> -                                     log_warnx("vioblk: block read error, "
> -                                         "sector %lld", cmd.sector);
> -                                     goto out;
> -                             }
> -
> -                             if (write_mem(secdata_desc->addr, secdata,
> -                                 secdata_desc->len)) {
> -                                     log_warnx("can't write sector "
> -                                         "data to gpa @ 0x%llx",
> -                                         secdata_desc->addr);
> -                                     dump_descriptor_chain(desc, 
> cmd_desc_idx);
> -                                     vioblk_free_info(info);
> -                                     goto out;
> -                             }
> -
> -                             vioblk_free_info(info);
> -
> -                             secbias += (secdata_desc->len / 
> VIRTIO_BLK_SECTOR_SIZE);
> -                             secdata_desc_idx = secdata_desc->next &
> -                                 VIOBLK_QUEUE_MASK;
> -                             secdata_desc = &desc[secdata_desc_idx];
> -                     } while (secdata_desc->flags & VRING_DESC_F_NEXT);
> -
> -                     ds_desc_idx = secdata_desc_idx;
> -                     ds_desc = secdata_desc;
> -
> -                     ds = VIRTIO_BLK_S_OK;
> -                     if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> -                             log_warnx("can't write device status data @ "
> -                                 "0x%llx", ds_desc->addr);
> -                             dump_descriptor_chain(desc, cmd_desc_idx);
> -                             goto out;
> -                     }
> -
> -                     ret = 1;
> -                     dev->cfg.isr_status = 1;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].id = 
> cmd_desc_idx;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].len = 
> cmd_desc->len;
> -                     used->idx++;
> -
> -                     dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> -                         VIOBLK_QUEUE_MASK;
> +                     if (vioblk_io(dev, preadv, &cmd, cmd_desc, desc) != 0)
> +                             goto fail;
>  
> -                     if (write_mem(q_gpa, vr, vr_sz)) {
> -                             log_warnx("vioblk: error writing vio ring");
> -                     }
>                       break;
>               case VIRTIO_BLK_T_OUT:
> -                     secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> -                     secdata_desc = &desc[secdata_desc_idx];
> -
> -                     if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) {
> -                             log_warnx("wr vioblk: unchained vioblk data "
> -                                 "descriptor received (idx %d)",
> -                                 cmd_desc_idx);
> -                             goto out;
> -                     }
> -
> -                     if (secdata_desc->len > dev->max_xfer) {
> -                             log_warnx("%s: invalid read size %d requested",
> -                                 __func__, secdata_desc->len);
> -                             goto out;
> -                     }
> -
> -                     secbias = 0;
> -                     do {
> -                             struct ioinfo *info;
> -
> -                             info = vioblk_start_write(dev, cmd.sector + 
> secbias,
> -                                 secdata_desc->addr, secdata_desc->len);
> -
> -                             if (info == NULL) {
> -                                     log_warnx("wr vioblk: can't read "
> -                                         "sector data @ 0x%llx",
> -                                         secdata_desc->addr);
> -                                     dump_descriptor_chain(desc,
> -                                         cmd_desc_idx);
> -                                     goto out;
> -                             }
> -
> -                             if (vioblk_finish_write(info)) {
> -                                     log_warnx("wr vioblk: disk write "
> -                                         "error");
> -                                     vioblk_free_info(info);
> -                                     goto out;
> -                             }
> -
> -                             vioblk_free_info(info);
> -
> -                             secbias += secdata_desc->len /
> -                                 VIRTIO_BLK_SECTOR_SIZE;
> -
> -                             secdata_desc_idx = secdata_desc->next &
> -                                 VIOBLK_QUEUE_MASK;
> -                             secdata_desc = &desc[secdata_desc_idx];
> -                     } while (secdata_desc->flags & VRING_DESC_F_NEXT);
> -
> -                     ds_desc_idx = secdata_desc_idx;
> -                     ds_desc = secdata_desc;
> -
> -                     ds = VIRTIO_BLK_S_OK;
> -                     if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> -                             log_warnx("wr vioblk: can't write device "
> -                                 "status data @ 0x%llx", ds_desc->addr);
> -                             dump_descriptor_chain(desc, cmd_desc_idx);
> -                             goto out;
> -                     }
> +                     if (vioblk_io(dev, pwritev, &cmd, cmd_desc, desc) != 0)
> +                             goto fail;
>  
> -                     ret = 1;
> -                     dev->cfg.isr_status = 1;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
> -                         cmd_desc_idx;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
> -                         cmd_desc->len;
> -                     used->idx++;
> -
> -                     dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> -                         VIOBLK_QUEUE_MASK;
> -                     if (write_mem(q_gpa, vr, vr_sz))
> -                             log_warnx("wr vioblk: error writing vio ring");
>                       break;
> +
>               case VIRTIO_BLK_T_FLUSH:
>               case VIRTIO_BLK_T_FLUSH_OUT:
> -                     ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> -                     ds_desc = &desc[ds_desc_idx];
> -
>                       ds = VIRTIO_BLK_S_OK;
> -                     if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> -                             log_warnx("fl vioblk: can't write device status 
> "
> -                                 "data @ 0x%llx", ds_desc->addr);
> -                             dump_descriptor_chain(desc, cmd_desc_idx);
> -                             goto out;
> -                     }
> +                     if (fsync(dev->fd) == -1)
> +                             ds = VIRTIO_BLK_S_IOERR;
>  
> -                     ret = 1;
> -                     dev->cfg.isr_status = 1;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
> -                         cmd_desc_idx;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
> -                         cmd_desc->len;
> -                     used->idx++;
> +                     desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> +                     desc = &desc[desc_idx];
>  
> -                     dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> -                         VIOBLK_QUEUE_MASK;
> -                     if (write_mem(q_gpa, vr, vr_sz)) {
> -                             log_warnx("fl vioblk: error writing vio ring");
> -                     }
> +                     vioblk_complete(desc, ds);
>                       break;
> +
>               default:
>                       log_warnx("%s: unsupported command 0x%x", __func__,
>                           cmd.type);
>  
> -                     ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
> -                     ds_desc = &desc[ds_desc_idx];
> +                     vioblk_io_skip(desc, cmd_desc);
> +                     break;
> +             }
>  
> -                     ds = VIRTIO_BLK_S_UNSUPP;
> -                     if (write_mem(ds_desc->addr, &ds, ds_desc->len)) {
> -                             log_warnx("%s: get id : can't write device "
> -                                 "status data @ 0x%llx", __func__,
> -                                 ds_desc->addr);
> -                             dump_descriptor_chain(desc, cmd_desc_idx);
> -                             goto out;
> -                     }
> +             used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
> +             used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
> +             membar_producer();
> +             used->idx++;
>  
> -                     ret = 1;
> -                     dev->cfg.isr_status = 1;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].id =
> -                         cmd_desc_idx;
> -                     used->ring[used->idx & VIOBLK_QUEUE_MASK].len =
> -                         cmd_desc->len;
> -                     used->idx++;
> +             cons++;
> +             cons &= VIOBLK_QUEUE_MASK;
> +     } while (cons != prod);
>  
> -                     dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> -                         VIOBLK_QUEUE_MASK;
> -                     if (write_mem(q_gpa, vr, vr_sz)) {
> -                             log_warnx("%s: get id : error writing vio ring",
> -                                 __func__);
> -                     }
> -                     break;
> -             }
> +fail:
> +     queue->vq.last_avail = cons;
>  
> -             idx = (idx + 1) & VIOBLK_QUEUE_MASK;
> -     }
> -out:
> -     free(vr);
> -     return (ret);
> +     vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq);
> +
> +     dev->cfg.isr_status = 1;
>  }
>  
>  int
> @@ -737,8 +585,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
>                       break;
>               case VIRTIO_CONFIG_QUEUE_NOTIFY:
>                       dev->cfg.queue_notify = *data;
> -                     if (vioblk_notifyq(dev))
> -                             *intr = 1;
> +                     task_add(iotq, &dev->q[0].t);
>                       break;
>               case VIRTIO_CONFIG_DEVICE_STATUS:
>                       dev->cfg.device_status = *data;
> @@ -752,7 +599,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
>                               dev->cfg.queue_select = 0;
>                               dev->cfg.queue_notify = 0;
>                               dev->cfg.isr_status = 0;
> -                             dev->vq[0].last_avail = 0;
> +                             dev->q[0].vq.last_avail = 0;
>                       }
>                       break;
>               default:
> @@ -1710,6 +1557,10 @@ virtio_init(struct vmd_vm *vm, int *chil
>                       return;
>               }
>  
> +             iotq = taskq_create("iotq");
> +             if (iotq == NULL)
> +                     fatalx("unable to create io taskq");
> +
>               /* One virtio block device for each disk defined in vcp */
>               for (i = 0; i < vcp->vcp_ndisks; i++) {
>                       if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1)
> @@ -1731,18 +1582,26 @@ virtio_init(struct vmd_vm *vm, int *chil
>                                   "device", __progname);
>                               return;
>                       }
> -                     vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE;
> -                     vioblk[i].vq[0].vq_availoffset =
> +
> +                     vioblk[i].q[0].dev = &vioblk[i];
> +                     vioblk[i].q[0].ring = NULL;
> +                     vioblk[i].q[0].vq.qs = VIOBLK_QUEUE_SIZE;
> +                     vioblk[i].q[0].vq.vq_availoffset =
>                           sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
> -                     vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
> +                     vioblk[i].q[0].vq.vq_usedoffset = VIRTQUEUE_ALIGN(
>                           sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
>                           + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
> -                     vioblk[i].vq[0].last_avail = 0;
> +                     vioblk[i].q[0].vq.last_avail = 0;
>                       vioblk[i].fd = child_disks[i];
>                       vioblk[i].sz = sz / 512;
>                       vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX;
>                       vioblk[i].max_xfer = 1048576;
> +
> +                     vioblk[i].vm_id = vcp->vcp_id;
> +                     vioblk[i].irq = pci_get_dev_irq(id);
>                       vioblk[i].pci_id = id;
> +
> +                     task_set(&vioblk[i].q[0].t, vioblk_q, &vioblk[i].q[0]);
>               }
>       }
>  
> Index: virtio.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
> retrieving revision 1.21
> diff -u -p -r1.21 virtio.h
> --- virtio.h  17 Sep 2017 23:07:56 -0000      1.21
> +++ virtio.h  18 Sep 2017 00:39:00 -0000
> @@ -99,18 +99,6 @@ struct viornd_dev {
>       uint8_t pci_id;
>  };
>  
> -struct vioblk_dev {
> -     struct virtio_io_cfg cfg;
> -
> -     struct virtio_vq_info vq[VIRTIO_MAX_QUEUES];
> -
> -     int fd;
> -     uint64_t sz;
> -     uint32_t max_xfer;
> -
> -     uint8_t pci_id;
> -};
> -
>  struct vionet_dev {
>       pthread_mutex_t mutex;
>       struct event event;
> @@ -179,13 +167,6 @@ int viornd_restore(int);
>  void viornd_update_qs(void);
>  void viornd_update_qa(void);
>  int viornd_notifyq(void);
> -
> -int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> -int vioblk_dump(int);
> -int vioblk_restore(int, struct vm_create_params *, int *);
> -void vioblk_update_qs(struct vioblk_dev *);
> -void vioblk_update_qa(struct vioblk_dev *);
> -int vioblk_notifyq(struct vioblk_dev *);
>  
>  int virtio_net_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
>  int vionet_dump(int);
> Index: vm.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 vm.c
> --- vm.c      17 Sep 2017 23:07:56 -0000      1.27
> +++ vm.c      18 Sep 2017 00:39:00 -0000
> @@ -1578,7 +1578,7 @@ vaddr_mem(paddr_t gpa, size_t len)
>               if (gpa < vmr->vmr_gpa)
>                       continue;
>  
> -             if (gpend >= vmr->vmr_gpa + vmr->vmr_size)
> +             if (gpend > vmr->vmr_gpa + vmr->vmr_size)
>                       continue;
>  
>               return ((char *)vmr->vmr_va + (gpa - vmr->vmr_gpa));

Reply via email to