On Mon, Sep 18, 2017 at 10:48:36AM +1000, David Gwynne wrote:
> vmd currently performs disk io for the guest synchronously in the
> same thread the guest is running in. it also uses bounce buffers
> in between the guests "physical" memory and the read and writes
> against the disk image.
>
> this diff moves the vioblk ring processing into a taskq, ie, another
> thread. this allows the guest to run while vmd is doing the reads
> and writes against the disk image. currently vmd only creates a
> single taskq for all vioblk devices to use. when the guest posts
> and queue notify write, vmd simple task_adds the ring processing
> to the taskq. when the ring is processed, vmd updates the completion
> queue and posts an interrupt to the guest.
>
> this diff also takes advantage of the new vaddr_mem and iovec_mem
> APIs to avoid using bounce buffers between the guest and read/writes.
> when the guest configures a ring address, vaddr_mem is used to get
> direct access to the ring in vmds address space. reads and writes
> by the guest use iovec_mem to fill out an iovec array, which is
> then passed directly to preadv and pwritev.
>
> because ring and io is now performed on a different thread to the
> guests vcpu, memory ordering becomes a considering. this also adds
> the use of membar_consumer and membar_producer from sys/atomic.h
> to ensure the ring updates become visibile to the guest in the
> correct order.
>
> i would appreciate tests, particularly with vmd on i386 so i can
> know if the atomic api is available there.
>
vmd on i386 needs to be brought up to date again with amd64. Nothing
special, just need to spend a few hours diffing trees. For now though,
let's not let that block moving forward here.
> the vm.c chunk is a result of testing with seabios, which likes to
> place the ring at the end of the physical address space.
>
> ok?
>
> Index: Makefile
> ===
> RCS file: /cvs/src/usr.sbin/vmd/Makefile,v
> retrieving revision 1.16
> diff -u -p -r1.16 Makefile
> --- Makefile 3 Jul 2017 22:21:47 - 1.16
> +++ Makefile 18 Sep 2017 00:39:00 -
> @@ -7,6 +7,7 @@ SRCS= vmd.c control.c log.c priv.c proc
> SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
> SRCS+= ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c
> packet.c
> SRCS+= parse.y atomicio.c
> +SRCS+= task.c
>
> CFLAGS+= -Wall -I${.CURDIR}
> CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes
> Index: virtio.c
> ===
> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
> retrieving revision 1.54
> diff -u -p -r1.54 virtio.c
> --- virtio.c 17 Sep 2017 23:07:56 - 1.54
> +++ virtio.c 18 Sep 2017 00:39:00 -
> @@ -18,6 +18,7 @@
>
> #include/* PAGE_SIZE */
> #include
> +#include /* membars */
>
> #include
> #include
> @@ -43,14 +44,48 @@
> #include "virtio.h"
> #include "loadfile.h"
> #include "atomicio.h"
> +#include "task.h"
> +
> +#ifndef MIN
> +#define MIN(_a, _b) ((_a) < (_b) ? (_a) : (_b))
> +#endif
> +
> +#ifndef nitems
> +#define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
> +#endif
>
> extern char *__progname;
>
> +struct vioblk_queue {
> + struct vioblk_dev *dev;
> + void*ring;
> + struct virtio_vq_infovq;
> + struct task t;
> + struct event ev;
> +};
> +
> +struct vioblk_dev {
> + struct virtio_io_cfg cfg;
> +
> + struct vioblk_queue q[VIRTIO_MAX_QUEUES];
> +
> + int fd;
> + uint64_t sz;
> + uint32_t max_xfer;
> +
> + uint32_t vm_id;
> + int irq;
> +
> + uint8_t pci_id;
> +};
> +
> struct viornd_dev viornd;
> struct vioblk_dev *vioblk;
> struct vionet_dev *vionet;
> struct vmmci_dev vmmci;
>
> +struct taskq *iotq;
> +
> int nr_vionet;
> int nr_vioblk;
>
> @@ -62,13 +97,12 @@ int nr_vioblk;
> #define VMMCI_F_ACK (1<<1)
> #define VMMCI_F_SYNCRTC (1<<2)
>
> -struct ioinfo {
> - uint8_t *buf;
> - ssize_t len;
> - off_t offset;
> - int fd;
> - int error;
> -};
> +int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
> +int vioblk_dump(int);
> +int vioblk_restore(int, struct vm_create_params *, int *);
> +void vioblk_update_qs(struct vioblk_dev *);
> +void vioblk_update_qa(struct vioblk_dev *);
> +int vioblk_notifyq(struct vioblk_dev *);
>
> const char *
> vioblk_cmd_name(uint32_t type)
> @@ -85,6 +119,7 @@ vioblk_cmd_name(uint32_t type)
> }
> }
>
> +#if 0
> static void
> dump_descriptor_chain(struct vring_desc *desc, int16_t dxx)
> {
> @@ -108,6 +143,7 @@ dump_descriptor_chain(struct vring_desc
> desc[dxx].flags,
> desc[dxx].next);
> }
> +#endif
>
> static const char *
> virtio_reg_name(uint8_t reg)
> @@ -323,7 +359,10 @@ vioblk_update_qa(struct vioblk_dev *dev)
>