Re: [RFC] vhost: Introduce packed vq and add buffer elements

Eugenio Perez Martin Wed, 19 Jun 2024 03:21:06 -0700

On Tue, Jun 18, 2024 at 8:19 PM Sahil Siddiq <icegambi...@gmail.com> wrote:
>
> This is the first patch in a series to add support for packed
> virtqueues in vhost_shadow_virtqueue. This patch implements the
> insertion of available buffers in the descriptor area. It takes
> into account descriptor chains, but does not consider indirect
> descriptors.
>
> VhostShadowVirtqueue has also been modified so it acts as a layer
> of abstraction for split and packed virtqueues.
>
> Signed-off-by: Sahil Siddiq <sahil...@proton.me>
> ---
> Hi,
>
> I am currently working on adding support for packed virtqueues in
> vhost_shadow_virtqueue [1]. This patch only implements the insertion of
> available buffers in the descriptor area. It does not take into
> account indirect descriptors, event_idx or notifications.
>
> I don't think these changes are testable yet but I thought I would
> still post this patch for feedback. The following email annotates these
> changes with a few comments and questions that I have.
>
> Thanks,
> Sahil
>


Hi Sahil,

Just some nitpicks here and there,

> [1] https://wiki.qemu.org/Internships/ProjectIdeas/PackedShadowVirtqueue
>
>  hw/virtio/vhost-shadow-virtqueue.c | 124 ++++++++++++++++++++++++++++-
>  hw/virtio/vhost-shadow-virtqueue.h |  66 ++++++++++-----
>  2 files changed, 167 insertions(+), 23 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> b/hw/virtio/vhost-shadow-virtqueue.c
> index fc5f408f77..e3b276a9e9 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -217,6 +217,122 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue 
> *svq,
>      return true;
>  }
>
> +/**
> + * Write descriptors to SVQ packed vring
> + *
> + * @svq: The shadow virtqueue
> + * @sg: Cache for hwaddr
> + * @out_sg: The iovec from the guest that is read-only for device
> + * @out_num: iovec length
> + * @in_sg: The iovec from the guest that is write-only for device
> + * @in_num: iovec length
> + * @head_flags: flags for first descriptor in list
> + *
> + * Return true if success, false otherwise and print error.
> + */
> +static bool vhost_svq_vring_write_descs_packed(VhostShadowVirtqueue *svq, 
> hwaddr *sg,
> +                                        const struct iovec *out_sg, size_t 
> out_num,
> +                                        const struct iovec *in_sg, size_t 
> in_num,
> +                                        uint16_t *head_flags)
> +{
> +    uint16_t id, curr, head, i;
> +    unsigned n;
> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> +    bool ok;
> +
> +    head = svq->vring_packed.next_avail_idx;
> +    i = head;
> +    id = svq->free_head;
> +    curr = id;
> +
> +    size_t num = out_num + in_num;
> +
> +    if (num == 0) {
> +        return true;
> +    }

num == 0 is impossible now, the caller checks for that.

> +
> +    ok = vhost_svq_translate_addr(svq, sg, out_sg, out_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    ok = vhost_svq_translate_addr(svq, sg + out_num, in_sg, in_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    for (n = 0; n < num; n++) {
> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> +                (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> +                (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> +        if (i == head) {
> +            *head_flags = flags;
> +        } else {
> +            descs[i].flags = flags;
> +        }
> +
> +        descs[i].addr = cpu_to_le64(sg[n]);
> +        descs[i].id = id;
> +        if (n < out_num) {
> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> +        } else {
> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> +        }
> +
> +        curr = cpu_to_le16(svq->desc_next[curr]);
> +
> +        if (++i >= svq->vring_packed.vring.num) {
> +            i = 0;
> +            svq->vring_packed.avail_used_flags ^=
> +                    1 << VRING_PACKED_DESC_F_AVAIL |
> +                    1 << VRING_PACKED_DESC_F_USED;
> +        }
> +    }
> +
> +    if (i <= head) {
> +        svq->vring_packed.avail_wrap_counter ^= 1;
> +    }
> +
> +    svq->vring_packed.next_avail_idx = i;
> +    svq->free_head = curr;
> +    return true;
> +}
> +
> +static bool vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> +                                const struct iovec *out_sg, size_t out_num,
> +                                const struct iovec *in_sg, size_t in_num,
> +                                unsigned *head)
> +{
> +    bool ok;
> +    uint16_t head_flags = 0;
> +    g_autofree hwaddr *sgs = g_new(hwaddr, out_num + in_num);
> +
> +    *head = svq->vring_packed.next_avail_idx;
> +
> +    /* We need some descriptors here */
> +    if (unlikely(!out_num && !in_num)) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "Guest provided element with no descriptors");
> +        return false;
> +    }
> +
> +    ok = vhost_svq_vring_write_descs_packed(svq, sgs, out_sg, out_num,
> +                                            in_sg, in_num, &head_flags);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +

Ok now I see why you switched sgs length from MAX to sum. But if we're
here, why not just embed all vhost_svq_vring_write_descs_packed here?
vhost_svq_vring_write_descs makes sense to split as we repeat the
operation, but I think it adds nothing here. What do you think?

> +    /*
> +     * A driver MUST NOT make the first descriptor in the list
> +     * available before all subsequent descriptors comprising
> +     * the list are made available.
> +     */
> +    smp_wmb();
> +    svq->vring_packed.vring.desc[*head].flags = head_flags;
> +
> +    return true;
> +}
> +
>  static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>  {
>      bool needs_kick;
> @@ -258,7 +374,13 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const 
> struct iovec *out_sg,
>          return -ENOSPC;
>      }
>
> -    ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, 
> &qemu_head);
> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) {
> +        ok = vhost_svq_add_packed(svq, out_sg, out_num,
> +                                  in_sg, in_num, &qemu_head);
> +    } else {
> +        ok = vhost_svq_add_split(svq, out_sg, out_num,
> +                                 in_sg, in_num, &qemu_head);
> +    }
>      if (unlikely(!ok)) {
>          return -EINVAL;
>      }
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> b/hw/virtio/vhost-shadow-virtqueue.h
> index 19c842a15b..ee1a87f523 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -46,10 +46,53 @@ typedef struct VhostShadowVirtqueueOps {
>      VirtQueueAvailCallback avail_handler;
>  } VhostShadowVirtqueueOps;
>
> +struct vring_packed {
> +    /* Actual memory layout for this queue. */
> +    struct {
> +        unsigned int num;
> +        struct vring_packed_desc *desc;
> +        struct vring_packed_desc_event *driver;
> +        struct vring_packed_desc_event *device;
> +    } vring;
> +
> +    /* Avail used flags. */
> +    uint16_t avail_used_flags;
> +
> +    /* Index of the next avail descriptor. */
> +    uint16_t next_avail_idx;
> +
> +    /* Driver ring wrap counter */
> +    bool avail_wrap_counter;
> +};
> +
>  /* Shadow virtqueue to relay notifications */
>  typedef struct VhostShadowVirtqueue {
> +    /* Virtio queue shadowing */
> +    VirtQueue *vq;
> +
> +    /* Virtio device */
> +    VirtIODevice *vdev;
> +
> +    /* SVQ vring descriptors state */
> +    SVQDescState *desc_state;
> +
> +    /*
> +     * Backup next field for each descriptor so we can recover securely, not
> +     * needing to trust the device access.
> +     */
> +    uint16_t *desc_next;
> +
> +    /* Next free descriptor */
> +    uint16_t free_head;
> +
> +    /* Size of SVQ vring free descriptors */
> +    uint16_t num_free;
> +

Why the reorder of all of the previous members?

Apart from the nitpicks I don't see anything wrong with this part of
the project. Looking forward to the next!

Thanks!

>      /* Shadow vring */
> -    struct vring vring;
> +    union {
> +        struct vring vring;
> +        struct vring_packed vring_packed;
> +    };
>
>      /* Shadow kick notifier, sent to vhost */
>      EventNotifier hdev_kick;
> @@ -69,27 +112,12 @@ typedef struct VhostShadowVirtqueue {
>      /* Guest's call notifier, where the SVQ calls guest. */
>      EventNotifier svq_call;
>
> -    /* Virtio queue shadowing */
> -    VirtQueue *vq;
> -
> -    /* Virtio device */
> -    VirtIODevice *vdev;
> -
>      /* IOVA mapping */
>      VhostIOVATree *iova_tree;
>
> -    /* SVQ vring descriptors state */
> -    SVQDescState *desc_state;
> -
>      /* Next VirtQueue element that guest made available */
>      VirtQueueElement *next_guest_avail_elem;
>
> -    /*
> -     * Backup next field for each descriptor so we can recover securely, not
> -     * needing to trust the device access.
> -     */
> -    uint16_t *desc_next;
> -
>      /* Caller callbacks */
>      const VhostShadowVirtqueueOps *ops;
>
> @@ -99,17 +127,11 @@ typedef struct VhostShadowVirtqueue {
>      /* Next head to expose to the device */
>      uint16_t shadow_avail_idx;
>
> -    /* Next free descriptor */
> -    uint16_t free_head;
> -
>      /* Last seen used idx */
>      uint16_t shadow_used_idx;
>
>      /* Next head to consume from the device */
>      uint16_t last_used_idx;
> -
> -    /* Size of SVQ vring free descriptors */
> -    uint16_t num_free;
>  } VhostShadowVirtqueue;
>
>  bool vhost_svq_valid_features(uint64_t features, Error **errp);
> --
> 2.45.2
>

Re: [RFC] vhost: Introduce packed vq and add buffer elements

Reply via email to