On Wed, Jul 10, 2024 at 2:56 PM Jonah Palmer <jonah.pal...@oracle.com> wrote:
>
> Add VIRTIO_F_IN_ORDER feature support for the virtqueue_flush operation.
>
> The goal of the virtqueue_ordered_flush operation when the
> VIRTIO_F_IN_ORDER feature has been negotiated is to write elements to
> the used/descriptor ring in-order and then update used_idx.
>
> The function iterates through the VirtQueueElement used_elems array
> in-order starting at vq->used_idx. If the element is valid (filled), the
> element is written to the used/descriptor ring. This process continues
> until we find an invalid (not filled) element.
>
> For packed VQs, the first entry (at vq->used_idx) is written to the
> descriptor ring last so the guest doesn't see any invalid descriptors.
>
> If any elements were written, the used_idx is updated.
>
> Signed-off-by: Jonah Palmer <jonah.pal...@oracle.com>

Acked-by: Eugenio Pérez <epere...@redhat.com>

> ---
> Several fixes here for the split VQ case:
> - Ensure all previous write operations to buffers are completed before
>   updating the used_idx (via smp_wmb()).
>
> - used_elems index 'i' should be incremented by the number of descriptors
>   in the current element we just processed, not by the running total of
>   descriptors already seen. This would've caused batched operations to
>   miss ordered elements when looping through the used_elems array.
>
> - Do not keep the VQ's used_idx bound between 0 and vring.num-1 when
>   setting it via vring_used_idx_set().
>
>   While the packed VQ case naturally keeps used_idx bound between 0 and
>   vring.num-1, the split VQ case cannot. This is because used_idx is
>   used to compare the current event index with the new and old used
>   indices to decide if a notification is necessary (see
>   virtio_split_should_notify()). This comparison expects used_idx to be
>   between 0 and 65535, not 0 and vring.num-1.
>
>  hw/virtio/virtio.c | 70 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 0000a7b41c..b419d8d6e7 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -1023,6 +1023,72 @@ static void virtqueue_packed_flush(VirtQueue *vq, 
> unsigned int count)
>      }
>  }
>
> +static void virtqueue_ordered_flush(VirtQueue *vq)
> +{
> +    unsigned int i = vq->used_idx % vq->vring.num;
> +    unsigned int ndescs = 0;
> +    uint16_t old = vq->used_idx;
> +    uint16_t new;
> +    bool packed;
> +    VRingUsedElem uelem;
> +
> +    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED);
> +
> +    if (packed) {
> +        if (unlikely(!vq->vring.desc)) {
> +            return;
> +        }
> +    } else if (unlikely(!vq->vring.used)) {
> +        return;
> +    }
> +
> +    /* First expected in-order element isn't ready, nothing to do */
> +    if (!vq->used_elems[i].in_order_filled) {
> +        return;
> +    }
> +
> +    /* Search for filled elements in-order */
> +    while (vq->used_elems[i].in_order_filled) {
> +        /*
> +         * First entry for packed VQs is written last so the guest
> +         * doesn't see invalid descriptors.
> +         */
> +        if (packed && i != vq->used_idx) {
> +            virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, 
> false);
> +        } else if (!packed) {
> +            uelem.id = vq->used_elems[i].index;
> +            uelem.len = vq->used_elems[i].len;
> +            vring_used_write(vq, &uelem, i);
> +        }
> +
> +        vq->used_elems[i].in_order_filled = false;
> +        ndescs += vq->used_elems[i].ndescs;
> +        i += vq->used_elems[i].ndescs;
> +        if (i >= vq->vring.num) {
> +            i -= vq->vring.num;
> +        }
> +    }
> +
> +    if (packed) {
> +        virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, 
> true);
> +        vq->used_idx += ndescs;
> +        if (vq->used_idx >= vq->vring.num) {
> +            vq->used_idx -= vq->vring.num;
> +            vq->used_wrap_counter ^= 1;
> +            vq->signalled_used_valid = false;
> +        }
> +    } else {
> +        /* Make sure buffer is written before we update index. */
> +        smp_wmb();
> +        new = old + ndescs;
> +        vring_used_idx_set(vq, new);
> +        if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - 
> old))) {
> +            vq->signalled_used_valid = false;
> +        }
> +    }
> +    vq->inuse -= ndescs;
> +}
> +
>  void virtqueue_flush(VirtQueue *vq, unsigned int count)
>  {
>      if (virtio_device_disabled(vq->vdev)) {
> @@ -1030,7 +1096,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
>          return;
>      }
>
> -    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
> +        virtqueue_ordered_flush(vq);
> +    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
>          virtqueue_packed_flush(vq, count);
>      } else {
>          virtqueue_split_flush(vq, count);
> --
> 2.43.5
>


Reply via email to