The branch main has been updated by emaste: URL: https://cgit.FreeBSD.org/src/commit/?id=a28cf86c4171fbd004d02f331026e1a946d85778
commit a28cf86c4171fbd004d02f331026e1a946d85778 Author: Hans Rosenfeld <[email protected]> AuthorDate: 2025-10-28 10:33:42 +0000 Commit: Ed Maste <[email protected]> CommitDate: 2026-03-04 17:30:02 +0000 bhyve/virtio: Rework iovec handling functions for efficiency and clarity Add check_iov_len() to check whether an iovec array covers a certain length without the need to call count_iov() on the whole array first. Garbage-collect the 'seek' argument to buf_to_iov(), used only by virtio-scsi control request handling. The apparent benefit of using it to copy only the final status byte instead of the whole TMF or AN request (25 and 21 bytes, respectively) is dubious at best, given that the extra code to handle this in buf_to_iov() allocates and frees a new iovec array and uses seek_iov(), which traverses the whole array and copies iovecs around. Replace seek_iov() and truncate_iov(), used only by virtio-scsi, with a single function split_iov() which combines the functionality of both in a more efficient way: While seek_iov() always copies all iovecs past the seek offset into a new iovec array, split_iov() works in place and doesn't copy iovecs unless actually necessary. By using split_iov(), we can avoid almost all copying of iovecs in I/O handling code paths of virtio-scsi. Reviewed by: markj Differential Revision: https://reviews.freebsd.org/D53468 --- usr.sbin/bhyve/iov.c | 106 ++++++++++++++++++----------------- usr.sbin/bhyve/iov.h | 18 +++--- usr.sbin/bhyve/net_backends.c | 2 +- usr.sbin/bhyve/pci_virtio_scsi.c | 116 +++++++++++++++++++++++---------------- 4 files changed, 136 insertions(+), 106 deletions(-) diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c index 2bad55267ff3..16d7765b437b 100644 --- a/usr.sbin/bhyve/iov.c +++ b/usr.sbin/bhyve/iov.c @@ -3,6 +3,7 @@ * * Copyright (c) 2016 Jakub Klama <[email protected]>. * Copyright (c) 2018 Alexander Motin <[email protected]> + * Copyright (c) 2026 Hans Rosenfeld * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,70 +33,88 @@ #include <sys/types.h> #include <sys/uio.h> +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include "iov.h" -void -seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, int *niov2, - size_t seek) -{ - size_t remainder = 0; - size_t left = seek; - int i, j; - - for (i = 0; i < niov1; i++) { - size_t toseek = MIN(left, iov1[i].iov_len); - left -= toseek; - if (toseek == iov1[i].iov_len) - continue; +/* + * Given an iovec and an offset, split the iovec into two at the offset and + * return a pointer to the beginning of the second iovec. + * + * The caller is responsible for providing an extra iovec in the array for the + * split. That is, there should be space for *niov1 + 1 iovecs in the array. + */ +struct iovec * +split_iov(struct iovec *iov, size_t *niov1, size_t offset, size_t *niov2) +{ + size_t count, resid; - if (left == 0) { - remainder = toseek; + /* Find the iovec entry that contains the offset. */ + resid = offset; + for (count = 0; count < *niov1; count++) { + if (resid < iov[count].iov_len) break; - } + resid -= iov[count].iov_len; } - for (j = i; j < niov1; j++) { - iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; - iov2[j - i].iov_len = iov1[j].iov_len - remainder; - remainder = 0; + if (resid == 0 || count == *niov1) { + /* Easy case, we don't have to split an iovec entry. */ + *niov2 = *niov1 - count; + *niov1 = count; + if (*niov2 == 0) + return (NULL); + return (&iov[count]); } - *niov2 = j - i; + /* The entry iov[count] needs to be split. */ + *niov1 = count + 1; + *niov2 = *niov1 - count; + memmove(&iov[count + 1], &iov[count], sizeof(struct iovec) * (*niov2)); + iov[count].iov_len = resid; + iov[count + 1].iov_base = (char *)iov[count].iov_base + resid; + iov[count + 1].iov_len -= resid; + return (&iov[count + 1]); } size_t -count_iov(const struct iovec *iov, int niov) +count_iov(const struct iovec *iov, size_t niov) { size_t total = 0; - int i; + size_t i; - for (i = 0; i < niov; i++) + for (i = 0; i < niov; i++) { + assert(total <= SIZE_MAX - iov[i].iov_len); total += iov[i].iov_len; + } return (total); } -void -truncate_iov(struct iovec *iov, int *niov, size_t length) +bool +check_iov_len(const struct iovec *iov, size_t niov, size_t len) { - int i; + size_t total = 0; + size_t i; - for (i = 0; i < *niov && length > 0; i++) { - if (length < iov[i].iov_len) - iov[i].iov_len = length; - length -= iov[i].iov_len; + for (i = 0; i < niov; i++) { + assert(total <= SIZE_MAX - iov[i].iov_len); + total += iov[i].iov_len; + if (total >= len) + return (true); } - *niov = i; + + return (false); } ssize_t -iov_to_buf(const struct iovec *iov, int niov, void **buf) +iov_to_buf(const struct iovec *iov, size_t niov, void **buf) { size_t ptr, total; - int i; + size_t i; total = count_iov(iov, niov); *buf = realloc(*buf, total); @@ -111,21 +130,10 @@ iov_to_buf(const struct iovec *iov, int niov, void **buf) } ssize_t -buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, - size_t seek) +buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, size_t niov) { - struct iovec *diov; size_t off = 0, len; - int i; - - if (seek > 0) { - int ndiov; - - diov = malloc(sizeof(struct iovec) * niov); - seek_iov(iov, niov, diov, &ndiov, seek); - iov = diov; - niov = ndiov; - } + size_t i; for (i = 0; i < niov && off < buflen; i++) { len = MIN(iov[i].iov_len, buflen - off); @@ -133,9 +141,5 @@ buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, off += len; } - if (seek > 0) - free(diov); - return ((ssize_t)off); } - diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h index aef0a2e1ee49..e14a9bc7e019 100644 --- a/usr.sbin/bhyve/iov.h +++ b/usr.sbin/bhyve/iov.h @@ -3,6 +3,7 @@ * * Copyright (c) 2016 Jakub Klama <[email protected]>. * Copyright (c) 2018 Alexander Motin <[email protected]> + * Copyright (c) 2026 Hans Rosenfeld * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,12 +32,15 @@ #ifndef _IOV_H_ #define _IOV_H_ -void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, - int *niov2, size_t seek); -void truncate_iov(struct iovec *iov, int *niov, size_t length); -size_t count_iov(const struct iovec *iov, int niov); -ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); -ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, - int niov, size_t seek); +#include <stdbool.h> + +/* Number of additional iovecs required for split_iov() */ +#define SPLIT_IOV_ADDL_IOV 2 + +struct iovec *split_iov(struct iovec *, size_t *, size_t, size_t *); +size_t count_iov(const struct iovec *, size_t); +bool check_iov_len(const struct iovec *, size_t, size_t); +ssize_t iov_to_buf(const struct iovec *, size_t, void **); +ssize_t buf_to_iov(const void *, size_t, const struct iovec *, size_t); #endif /* _IOV_H_ */ diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c index 95909d1f8ea2..e7421df3492a 100644 --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -198,7 +198,7 @@ tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) * we read it from there. */ ret = buf_to_iov(priv->bbuf, priv->bbuflen, - iov, iovcnt, 0); + iov, iovcnt); /* Mark the bounce buffer as empty. */ priv->bbuflen = 0; diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c index bb0b18c51571..5eaf55496467 100644 --- a/usr.sbin/bhyve/pci_virtio_scsi.c +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -3,6 +3,7 @@ * * Copyright (c) 2016 Jakub Klama <[email protected]>. * Copyright (c) 2018 Marcelo Araujo <[email protected]>. + * Copyright (c) 2026 Hans Rosenfeld * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -120,10 +121,11 @@ struct pci_vtscsi_worker { struct pci_vtscsi_request { struct pci_vtscsi_queue * vsr_queue; - struct iovec vsr_iov_in[VTSCSI_MAXSEG]; - int vsr_niov_in; - struct iovec vsr_iov_out[VTSCSI_MAXSEG]; - int vsr_niov_out; + struct iovec vsr_iov[VTSCSI_MAXSEG + SPLIT_IOV_ADDL_IOV]; + struct iovec * vsr_iov_in; + struct iovec * vsr_iov_out; + size_t vsr_niov_in; + size_t vsr_niov_out; uint32_t vsr_idx; STAILQ_ENTRY(pci_vtscsi_request) vsr_link; }; @@ -230,13 +232,13 @@ static void pci_vtscsi_neg_features(void *, uint64_t); static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); static inline int pci_vtscsi_get_lun(uint8_t *); -static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); -static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_tmf *); -static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_an *); static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, - int, struct iovec *, int); + size_t, struct iovec *, size_t); static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *); static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *); static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *); @@ -353,7 +355,7 @@ pci_vtscsi_get_lun(uint8_t *lun) return (((lun[2] << 8) | lun[3]) & 0x3fff); } -static int +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, size_t bufsize) { @@ -363,7 +365,7 @@ pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, if (bufsize < sizeof(uint32_t)) { WPRINTF("ignoring truncated control request"); - return (0); + return; } type = *(uint32_t *)buf; @@ -371,25 +373,21 @@ pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, if (type == VIRTIO_SCSI_T_TMF) { if (bufsize != sizeof(*tmf)) { WPRINTF("ignoring tmf request with size %zu", bufsize); - return (0); + return; } tmf = (struct pci_vtscsi_ctrl_tmf *)buf; - return (pci_vtscsi_tmf_handle(sc, tmf)); - } - - if (type == VIRTIO_SCSI_T_AN_QUERY) { + pci_vtscsi_tmf_handle(sc, tmf); + } else if (type == VIRTIO_SCSI_T_AN_QUERY) { if (bufsize != sizeof(*an)) { WPRINTF("ignoring AN request with size %zu", bufsize); - return (0); + return; } an = (struct pci_vtscsi_ctrl_an *)buf; - return (pci_vtscsi_an_handle(sc, an)); + pci_vtscsi_an_handle(sc, an); } - - return (0); } -static int +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_tmf *tmf) { @@ -454,46 +452,73 @@ pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); - return (1); } -static int +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc __unused, struct pci_vtscsi_ctrl_an *an __unused) { - return (0); } static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, - int niov_in, struct iovec *iov_out, int niov_out) + size_t niov_in, struct iovec *iov_out, size_t niov_out) { struct pci_vtscsi_softc *sc = q->vsq_sc; struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; struct pci_vtscsi_req_cmd_wr *cmd_wr; - struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; + struct iovec *data_iov_in, *data_iov_out; union ctl_io *io; - int data_niov_in, data_niov_out; + size_t data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err, nxferred; - if (count_iov(iov_out, niov_out) < VTSCSI_OUT_HEADER_LEN(sc)) { + /* + * Make sure we got at least enough space for the VirtIO-SCSI + * command headers. If not, return this request immediately. + */ + if (check_iov_len(iov_out, niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with insufficient output"); return (0); } - if (count_iov(iov_in, niov_in) < VTSCSI_IN_HEADER_LEN(sc)) { + + if (check_iov_len(iov_in, niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with incomplete header"); return (0); } - seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, - VTSCSI_IN_HEADER_LEN(sc)); - seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, - VTSCSI_OUT_HEADER_LEN(sc)); + /* + * We have to split the iovec array into a header and data portion each + * for input and output. + * + * We need to start with the output section (at the end of iov) in case + * the iovec covering the final part of the output header needs to be + * split, in which case split_iov() will move all reamaining iovecs up + * by one to make room for a new iovec covering the first part of the + * output data portion. + */ + data_iov_out = split_iov(iov_out, &niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc), &data_niov_out); + + /* + * Similarly, to not overwrite the first iovec of the output section, + * the 2nd call to split_iov() to split the input section must actually + * cover the entire iovec array (both input and the already split output + * sections). + */ + niov_in += niov_out + data_niov_out; + + data_iov_in = split_iov(iov_in, &niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc), &data_niov_in); + + /* + * And of course we now have to adjust data_niov_in accordingly. + */ + data_niov_in -= niov_out + data_niov_out; - truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); - truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc)); @@ -564,7 +589,7 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, cmd_wr->sense_len); } - buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); + buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out); nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled; free(cmd_rd); free(cmd_wr); @@ -580,7 +605,7 @@ pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) struct vi_req req; void *buf = NULL; size_t bufsize; - int iolen, n; + int n; sc = vsc; @@ -589,14 +614,13 @@ pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) assert(n >= 1 && n <= VTSCSI_MAXSEG); bufsize = iov_to_buf(iov, n, &buf); - iolen = pci_vtscsi_control_handle(sc, buf, bufsize); - buf_to_iov((uint8_t *)buf + bufsize - iolen, iolen, iov, n, - bufsize - iolen); + pci_vtscsi_control_handle(sc, buf, bufsize); + buf_to_iov((uint8_t *)buf, bufsize, iov, n); /* * Release this chain and handle more */ - vq_relchain(vq, req.idx, iolen); + vq_relchain(vq, req.idx, bufsize); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ free(buf); @@ -614,7 +638,6 @@ pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq) struct pci_vtscsi_softc *sc; struct pci_vtscsi_queue *q; struct pci_vtscsi_request *req; - struct iovec iov[VTSCSI_MAXSEG]; struct vi_req vireq; int n; @@ -622,18 +645,17 @@ pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq) q = &sc->vss_queues[vq->vq_num - 2]; while (vq_has_descs(vq)) { - n = vq_getchain(vq, iov, VTSCSI_MAXSEG, &vireq); + req = calloc(1, sizeof(struct pci_vtscsi_request)); + + n = vq_getchain(vq, req->vsr_iov, VTSCSI_MAXSEG, &vireq); assert(n >= 1 && n <= VTSCSI_MAXSEG); - req = calloc(1, sizeof(struct pci_vtscsi_request)); req->vsr_idx = vireq.idx; req->vsr_queue = q; + req->vsr_iov_in = &req->vsr_iov[0]; req->vsr_niov_in = vireq.readable; + req->vsr_iov_out = &req->vsr_iov[vireq.readable]; req->vsr_niov_out = vireq.writable; - memcpy(req->vsr_iov_in, iov, - req->vsr_niov_in * sizeof(struct iovec)); - memcpy(req->vsr_iov_out, iov + vireq.readable, - req->vsr_niov_out * sizeof(struct iovec)); pthread_mutex_lock(&q->vsq_mtx); STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);
