From: Eugenio Pérez <[email protected]> Some vdpa devices benefit from the in order feature. Add support to SVQ so QEMU can migrate these.
Signed-off-by: Eugenio Pérez <[email protected]> Acked-by: Jason Wang <[email protected]> Reviewed-by: Michael S. Tsirkin <[email protected]> Signed-off-by: Michael S. Tsirkin <[email protected]> Message-Id: <[email protected]> --- hw/virtio/vhost-shadow-virtqueue.c | 137 +++++++++++++++++++++++++++-- hw/virtio/vhost-shadow-virtqueue.h | 36 ++++++-- 2 files changed, 160 insertions(+), 13 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 2d8fc82cc0..60212fcd7b 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -12,11 +12,14 @@ #include "qemu/error-report.h" #include "qapi/error.h" +#include "qemu/iov.h" #include "qemu/main-loop.h" #include "qemu/log.h" #include "qemu/memalign.h" #include "linux-headers/linux/vhost.h" +#define VIRTIO_RING_NOT_IN_BATCH UINT16_MAX + /** * Validate the transport device features that both guests can use with the SVQ * and SVQs can use with the device. @@ -150,7 +153,33 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, static uint16_t vhost_svq_next_desc(const VhostShadowVirtqueue *svq, uint16_t id) { - return svq->desc_state[id].next; + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) { + return (id == svq->vring.num) ? 0 : ++id; + } else { + return svq->desc_state[id].next; + } +} + +/** + * Updates the SVQ free_head member after adding them to the SVQ avail ring. + * The new free_head is the next descriptor that SVQ will make available by + * forwarding a new guest descriptor. + * + * @svq Shadow Virtqueue + * @num Number of descriptors added + * @id ID of the last descriptor added to the SVQ avail ring. + */ +static void vhost_svq_update_free_head(VhostShadowVirtqueue *svq, + size_t num, uint16_t id) +{ + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) { + svq->free_head += num; + if (svq->free_head >= svq->vring.num) { + svq->free_head -= svq->vring.num; + } + } else { + svq->free_head = vhost_svq_next_desc(svq, id); + } } /** @@ -202,7 +231,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, i = next; } - svq->free_head = vhost_svq_next_desc(svq, last); + vhost_svq_update_free_head(svq, num, last); return true; } @@ -306,6 +335,9 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, svq->num_free -= ndescs; svq->desc_state[qemu_head].elem = elem; svq->desc_state[qemu_head].ndescs = ndescs; + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) { + svq->desc_state[qemu_head].in_bytes = iov_size(in_sg, in_num); + } vhost_svq_kick(svq); return 0; } @@ -401,6 +433,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) { uint16_t *used_idx = &svq->vring.used->idx; + + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER) && + svq->batch_last.id != VIRTIO_RING_NOT_IN_BATCH) { + return true; + } + if (svq->last_used_idx != svq->shadow_used_idx) { return true; } @@ -463,6 +501,47 @@ static uint16_t vhost_svq_get_last_used_split(VhostShadowVirtqueue *svq, return le32_to_cpu(used->ring[last_used].id); } +/* + * Gets the next buffer id and moves forward the used idx, so the next time + * SVQ calls this function will get the next one. IN_ORDER version + * + * @svq: Shadow VirtQueue + * @len: Consumed length by the device. + * + * Return the next descriptor consumed by the device. + */ +static int32_t vhost_svq_get_last_used_split_in_order( + VhostShadowVirtqueue *svq, + uint32_t *len) +{ + unsigned num = svq->vring.num; + const vring_used_t *used = svq->vring.used; + uint16_t last_used = svq->last_used & (num - 1); + uint16_t last_used_idx = svq->last_used_idx & (num - 1); + + if (svq->batch_last.id == VIRTIO_RING_NOT_IN_BATCH) { + svq->batch_last.id = le32_to_cpu(used->ring[last_used_idx].id); + svq->batch_last.len = le32_to_cpu(used->ring[last_used_idx].len); + } + + if (unlikely(last_used >= num)) { + qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", + svq->vdev->name, last_used); + return -1; + } + + if (svq->batch_last.id == last_used) { + svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH; + *len = svq->batch_last.len; + } else { + *len = svq->desc_state[last_used].in_bytes; + } + + svq->last_used += svq->desc_state[last_used].ndescs; + svq->last_used_idx++; + return last_used; +} + static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, uint16_t num, uint16_t i) { @@ -474,8 +553,8 @@ static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, } G_GNUC_WARN_UNUSED_RESULT -static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq, - uint16_t id) +static VirtQueueElement *vhost_svq_detach_buf_split(VhostShadowVirtqueue *svq, + uint16_t id) { uint16_t num = svq->desc_state[id].ndescs; uint16_t last_used_chain = vhost_svq_last_desc_of_chain(svq, num, id); @@ -486,6 +565,33 @@ static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq, return g_steal_pointer(&svq->desc_state[id].elem); } +G_GNUC_WARN_UNUSED_RESULT +static VirtQueueElement *vhost_svq_detach_buf_split_in_order( + VhostShadowVirtqueue *svq, + uint16_t id) +{ + return g_steal_pointer(&svq->desc_state[id].elem); +} + +/* + * Return the descriptor id (and the chain of ids) to the free list + * + * @svq: Shadow Virtqueue + * @id: Id of the buffer to return. + * + * Return the element associated to the buffer if any. + */ +G_GNUC_WARN_UNUSED_RESULT +static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq, + uint16_t id) +{ + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) { + return vhost_svq_detach_buf_split_in_order(svq, id); + } else { + return vhost_svq_detach_buf_split(svq, id); + } +} + G_GNUC_WARN_UNUSED_RESULT static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len) @@ -498,7 +604,18 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, /* Only get used array entries after they have been exposed by dev */ smp_rmb(); - last_used = vhost_svq_get_last_used_split(svq, len); + + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) { + int32_t r; + r = vhost_svq_get_last_used_split_in_order(svq, len); + if (r < 0) { + return NULL; + } + + last_used = r; + } else { + last_used = vhost_svq_get_last_used_split(svq, len); + } if (unlikely(last_used >= svq->vring.num)) { qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", @@ -726,6 +843,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->next_guest_avail_elem = NULL; svq->shadow_avail_idx = 0; svq->shadow_used_idx = 0; + memset(&svq->batch_last, 0, sizeof(svq->batch_last)); + svq->last_used = 0; svq->last_used_idx = 0; svq->vdev = vdev; svq->vq = vq; @@ -742,8 +861,12 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); svq->desc_state = g_new0(SVQDescState, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { - svq->desc_state[i].next = i + 1; + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) { + svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH; + } else { + for (unsigned i = 0; i < svq->vring.num - 1; i++) { + svq->desc_state[i].next = i + 1; + } } } diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index f52c33e650..ec16a1e838 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -24,11 +24,19 @@ typedef struct SVQDescState { */ unsigned int ndescs; - /* - * Backup next field for each descriptor so we can recover securely, not - * needing to trust the device access. - */ - uint16_t next; + union { + /* + * Total length of the available buffer that is writable by the device. + * Only used in packed vq. + */ + uint32_t in_bytes; + + /* + * Backup next field for each descriptor so we can recover securely, not + * needing to trust the device access. Only used in split vq. + */ + uint16_t next; + }; } SVQDescState; typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; @@ -99,9 +107,25 @@ typedef struct VhostShadowVirtqueue { /* Next head to expose to the device */ uint16_t shadow_avail_idx; - /* Next free descriptor */ + /* + * Next free descriptor. + * + * Without IN_ORDER free_head is used as a linked list head, and + * desc_next[id] is the next element. + * With IN_ORDER free_head is the next available buffer index. + */ uint16_t free_head; + /* + * Last used element of the processing batch of used descriptors if + * IN_ORDER. + * If SVQ is not processing a batch of descriptors id is set to UINT_MAX. + */ + vring_used_elem_t batch_last; + + /* Last used id if IN_ORDER and split vq */ + uint16_t last_used; + /* Last seen used idx */ uint16_t shadow_used_idx; -- MST
