This splits the read/write functions into top and bottom halves. It doesn't
change much yet, but this is a requirement for async IO. The start funtion
turns the request into an ioinfo (to be completed eventually by a thread) and
the finish function retrives the result. (for now, we just do the work in
finish.)

seems to work, but could probably use a little more load testing.

Index: virtio.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
retrieving revision 1.47
diff -u -p -r1.47 virtio.c
--- virtio.c    27 May 2017 23:58:16 -0000      1.47
+++ virtio.c    30 May 2017 10:56:47 -0000
@@ -62,6 +62,14 @@ int nr_vioblk;
 #define VMMCI_F_ACK            (1<<1)
 #define VMMCI_F_SYNCRTC                (1<<2)
 
+struct ioinfo {
+       uint8_t *buf;
+       ssize_t len;
+       off_t offset;
+       int fd;
+       int error;
+};
+
 const char *
 vioblk_cmd_name(uint32_t type)
 {
@@ -324,35 +332,87 @@ vioblk_update_qs(struct vioblk_dev *dev)
        dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
 }
 
-static char *
-vioblk_do_read(struct vioblk_dev *dev, off_t sector, ssize_t sz)
+static void
+vioblk_free_info(struct ioinfo *info)
 {
-       char *buf;
+       if (!info)
+               return;
+       free(info->buf);
+       free(info);
+}
 
-       buf = malloc(sz);
-       if (buf == NULL) {
-               log_warn("malloc errror vioblk read");
-               return (NULL);
-       }
+static struct ioinfo *
+vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz)
+{
+       struct ioinfo *info;
+
+       info = calloc(1, sizeof(*info));
+       if (!info)
+               goto nomem;
+       info->buf = malloc(sz);
+       if (info->buf == NULL)
+               goto nomem;
+       info->len = sz;
+       info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
+       info->fd = dev->fd;
+
+       return info;
 
-       if (pread(dev->fd, buf, sz, sector * VIRTIO_BLK_SECTOR_SIZE) != sz) {
+nomem:
+       free(info);
+       log_warn("malloc errror vioblk read");
+       return (NULL);
+}
+
+
+static const uint8_t *
+vioblk_finish_read(struct ioinfo *info)
+{
+       if (pread(info->fd, info->buf, info->len, info->offset) != info->len) {
+               info->error = errno;
                log_warn("vioblk read error");
-               free(buf);
-               return (NULL);
+               return NULL;
        }
 
-       return buf;
+       return info->buf;
+}
+
+static struct ioinfo *
+vioblk_start_write(struct vioblk_dev *dev, off_t sector, paddr_t addr, size_t 
len)
+{
+       struct ioinfo *info;
+
+       info = calloc(1, sizeof(*info));
+       if (!info)
+               goto nomem;
+       info->buf = malloc(len);
+       if (info->buf == NULL)
+               goto nomem;
+       info->len = len;
+       info->offset = sector * VIRTIO_BLK_SECTOR_SIZE;
+       info->fd = dev->fd;
+
+       if (read_mem(addr, info->buf, len)) {
+               vioblk_free_info(info);
+               return NULL;
+       }
+
+       return info;
+
+nomem:
+       free(info);
+       log_warn("malloc errror vioblk write");
+       return (NULL);
 }
 
 static int
-vioblk_do_write(struct vioblk_dev *dev, off_t sector, char *buf, ssize_t sz)
+vioblk_finish_write(struct ioinfo *info)
 {
-       if (pwrite(dev->fd, buf, sz, sector * VIRTIO_BLK_SECTOR_SIZE) != sz) {
+       if (pwrite(info->fd, info->buf, info->len, info->offset) != info->len) {
                log_warn("vioblk write error");
-               return (1);
+               return EIO;
        }
-
-       return (0);
+       return 0;
 }
 
 /*
@@ -368,7 +428,7 @@ vioblk_notifyq(struct vioblk_dev *dev)
        uint8_t ds;
        int ret;
        off_t secbias;
-       char *vr, *secdata;
+       char *vr;
        struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
        struct vring_avail *avail;
        struct vring_used *used;
@@ -441,14 +501,16 @@ vioblk_notifyq(struct vioblk_dev *dev)
 
                        secbias = 0;
                        do {
-                               /* read the data (use current data descriptor) 
*/
-                               /*
-                                * XXX waste to malloc secdata in vioblk_do_read
-                                * and free it here over and over
-                                */
-                               secdata = vioblk_do_read(dev, cmd.sector + 
secbias,
+                               struct ioinfo *info;
+                               const uint8_t *secdata;
+
+                               info = vioblk_start_read(dev, cmd.sector + 
secbias,
                                    (ssize_t)secdata_desc->len);
+
+                               /* read the data (use current data descriptor) 
*/
+                               secdata = vioblk_finish_read(info);
                                if (secdata == NULL) {
+                                       vioblk_free_info(info);
                                        log_warnx("vioblk: block read error, "
                                            "sector %lld", cmd.sector);
                                        goto out;
@@ -460,11 +522,11 @@ vioblk_notifyq(struct vioblk_dev *dev)
                                            "data to gpa @ 0x%llx",
                                            secdata_desc->addr);
                                        dump_descriptor_chain(desc, 
cmd_desc_idx);
-                                       free(secdata);
+                                       vioblk_free_info(info);
                                        goto out;
                                }
 
-                               free(secdata);
+                               vioblk_free_info(info);
 
                                secbias += (secdata_desc->len / 
VIRTIO_BLK_SECTOR_SIZE);
                                secdata_desc_idx = secdata_desc->next &
@@ -514,35 +576,31 @@ vioblk_notifyq(struct vioblk_dev *dev)
                                goto out;
                        }
 
-                       secdata = NULL;
                        secbias = 0;
                        do {
-                               secdata = realloc(secdata, secdata_desc->len);
-                               if (secdata == NULL) {
-                                       log_warn("wr vioblk: malloc error, "
-                                           "len %d", secdata_desc->len);
-                                       goto out;
-                               }
+                               struct ioinfo *info;
 
-                               if (read_mem(secdata_desc->addr, secdata,
-                                   secdata_desc->len)) {
+                               info = vioblk_start_write(dev, cmd.sector + 
secbias,
+                                   secdata_desc->addr, secdata_desc->len);
+
+                               if (info == NULL) {
                                        log_warnx("wr vioblk: can't read "
                                            "sector data @ 0x%llx",
                                            secdata_desc->addr);
                                        dump_descriptor_chain(desc,
                                            cmd_desc_idx);
-                                       free(secdata);
                                        goto out;
                                }
 
-                               if (vioblk_do_write(dev, cmd.sector + secbias,
-                                   secdata, (ssize_t)secdata_desc->len)) {
+                               if (vioblk_finish_write(info)) {
                                        log_warnx("wr vioblk: disk write "
                                            "error");
-                                       free(secdata);
+                                       vioblk_free_info(info);
                                        goto out;
                                }
 
+                               vioblk_free_info(info);
+
                                secbias += secdata_desc->len /
                                    VIRTIO_BLK_SECTOR_SIZE;
 
@@ -550,8 +608,6 @@ vioblk_notifyq(struct vioblk_dev *dev)
                                    VIOBLK_QUEUE_MASK;
                                secdata_desc = &desc[secdata_desc_idx];
                        } while (secdata_desc->flags & VRING_DESC_F_NEXT);
-
-                       free(secdata);
 
                        ds_desc_idx = secdata_desc_idx;
                        ds_desc = secdata_desc;
Index: vm.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
retrieving revision 1.18
diff -u -p -r1.18 vm.c
--- vm.c        28 May 2017 23:56:13 -0000      1.18
+++ vm.c        30 May 2017 10:54:53 -0000
@@ -1230,9 +1230,10 @@ find_gpa_range(struct vm_create_params *
  *      exist in the guest.
  */
 int
-write_mem(paddr_t dst, void *buf, size_t len)
+write_mem(paddr_t dst, const void *buf, size_t len)
 {
-       char *from = buf, *to;
+       const char *from = buf;
+       char *to;
        size_t n, off;
        struct vm_mem_range *vmr;
 
Index: vmd.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v
retrieving revision 1.54
diff -u -p -r1.54 vmd.h
--- vmd.h       4 May 2017 19:41:58 -0000       1.54
+++ vmd.h       30 May 2017 10:55:05 -0000
@@ -289,7 +289,7 @@ uint32_t vm_priv_addr(struct address *, 
 /* vmm.c */
 void    vmm(struct privsep *, struct privsep_proc *);
 void    vmm_shutdown(void);
-int     write_mem(paddr_t, void *buf, size_t);
+int     write_mem(paddr_t, const void *buf, size_t);
 int     read_mem(paddr_t, void *buf, size_t);
 int     opentap(char *);
 int     fd_hasdata(int);

Reply via email to