> -----Original Message-----
> From: Eugenio Perez Martin <[email protected]>
> Sent: 2025年12月4日 16:41
> To: Wafer <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Angus Chen
> <[email protected]>
> Subject: Re: [PATCH 2/4] vdpa: implement the interfaces alloc/free for
> indirect desc
> 
> External Mail: This email originated from OUTSIDE of the organization!
> Do not click links, open attachments or provide ANY information unless you
> recognize the sender and know the content is safe.
> 
> 
> On Thu, Dec 4, 2025 at 8:39 AM Wafer Xie <[email protected]> wrote:
> >
> > Dynamic Allocation/Free indirect descriptor.
> >
> > Signed-off-by: Wafer Xie <[email protected]>
> > ---
> >  hw/virtio/vhost-shadow-virtqueue.c |   5 +-
> >  hw/virtio/vhost-shadow-virtqueue.h |  49 ++++++++++-
> >  hw/virtio/vhost-vdpa.c             | 130 ++++++++++++++++++++++++++++-
> >  3 files changed, 180 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 2481d49345..ecc3245138 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -756,15 +756,18 @@ void vhost_svq_stop(VhostShadowVirtqueue
> *svq)
> >   *
> >   * @ops: SVQ owner callbacks
> >   * @ops_opaque: ops opaque pointer
> > + * @indirect_ops: Indirect descriptor operations
> >   */
> >  VhostShadowVirtqueue *vhost_svq_new(const
> VhostShadowVirtqueueOps *ops,
> > -                                    void *ops_opaque)
> > +                                    void *ops_opaque,
> > +                                    SVQIndirectOps *indirect_ops)
> >  {
> >      VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
> >
> >      event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
> >      svq->ops = ops;
> >      svq->ops_opaque = ops_opaque;
> > +    svq->indirect_ops = indirect_ops;
> >      return svq;
> >  }
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index 2b36df4dd5..c5d654eae8 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -54,6 +54,49 @@ typedef struct VhostShadowVirtqueueOps {
> >      VirtQueueAvailCallback avail_handler;  } VhostShadowVirtqueueOps;
> >
> > +/**
> > + * Callback to allocate indirect descriptor table.
> > + *
> > + * @svq: Shadow virtqueue
> > + * @num: Number of descriptors
> > + * @desc: Output pointer to allocated descriptor table
> > + * @iova: Output IOVA of the allocated table
> > + * @size: Output size of the allocated region
> > + * @opaque: Opaque data (vhost_vdpa pointer)
> > + *
> > + * Returns true on success, false on failure.
> > + */
> > +typedef bool (*SVQIndirectDescAlloc)(VhostShadowVirtqueue *svq,
> > +                                     size_t num,
> > +                                     vring_desc_t **desc,
> > +                                     hwaddr *iova,
> > +                                     size_t *size,
> > +                                     void *opaque);
> > +
> > +/**
> > + * Callback to free indirect descriptor table.
> > + *
> > + * @svq: Shadow virtqueue
> > + * @desc: Descriptor table to free
> > + * @iova: IOVA of the table
> > + * @size: Size of the allocated region
> > + * @opaque: Opaque data (vhost_vdpa pointer)  */ typedef void
> > +(*SVQIndirectDescFree)(VhostShadowVirtqueue *svq,
> > +                                    vring_desc_t *desc,
> > +                                    hwaddr iova,
> > +                                    size_t size,
> > +                                    void *opaque);
> > +
> > +/**
> > + * Indirect descriptor operations and context  */ typedef struct
> > +SVQIndirectOps {
> > +    SVQIndirectDescAlloc alloc;
> > +    SVQIndirectDescFree free;
> > +    void *opaque;  /* Pointer to struct vhost_vdpa */ }
> > +SVQIndirectOps;
> > +
> >  /* Shadow virtqueue to relay notifications */  typedef struct
> > VhostShadowVirtqueue {
> >      /* Shadow vring */
> > @@ -104,6 +147,9 @@ typedef struct VhostShadowVirtqueue {
> >      /* Caller callbacks opaque */
> >      void *ops_opaque;
> >
> > +    /* Indirect descriptor operations */
> > +    SVQIndirectOps *indirect_ops;
> > +
> >      /* Next head to expose to the device */
> >      uint16_t shadow_avail_idx;
> >
> > @@ -143,7 +189,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq,
> > VirtIODevice *vdev,  void vhost_svq_stop(VhostShadowVirtqueue *svq);
> >
> >  VhostShadowVirtqueue *vhost_svq_new(const
> VhostShadowVirtqueueOps *ops,
> > -                                    void *ops_opaque);
> > +                                    void *ops_opaque,
> > +                                    SVQIndirectOps *indirect_ops);
> >
> >  void vhost_svq_free(gpointer vq);
> >  G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue,
> vhost_svq_free);
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index
> > 7061b6e1a3..1719993f52 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -584,15 +584,130 @@ static int vhost_vdpa_get_dev_features(struct
> vhost_dev *dev,
> >      return ret;
> >  }
> >
> > +/**
> > + * Allocate indirect descriptor table for SVQ
> > + *
> > + * @svq: Shadow virtqueue
> > + * @num: Number of descriptors needed
> > + * @desc: Output pointer to the allocated table
> > + * @iova: Output IOVA for the table
> > + * @size: Output size of the allocated region
> > + * @opaque: Opaque pointer (vhost_vdpa instance)
> > + *
> > + * Returns true on success, false on failure.
> > + */
> > +static bool vhost_vdpa_svq_indirect_desc_alloc(VhostShadowVirtqueue
> *svq,
> > +                                                size_t num,
> > +                                                vring_desc_t **desc,
> > +                                                hwaddr *iova,
> > +                                                size_t *size,
> > +                                                void *opaque) {
> > +    struct vhost_vdpa *v = opaque;
> > +    size_t desc_size = sizeof(vring_desc_t) * num;
> > +    size_t alloc_size = ROUND_UP(desc_size,
> qemu_real_host_page_size());
> > +    DMAMap needle = {
> > +        .size = alloc_size - 1,
> > +        .perm = IOMMU_RO,
> > +    };
> > +    vring_desc_t *indirect_desc;
> > +    int r;
> > +
> > +    indirect_desc = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
> > +                         MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> > +    if (indirect_desc == MAP_FAILED) {
> > +        error_report("Cannot allocate indirect descriptor table");
> > +        return false;
> > +    }
> > +
> > +    r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &needle,
> > +                                  (hwaddr)(uintptr_t)indirect_desc);
> > +    if (unlikely(r != IOVA_OK)) {
> > +        error_report("Cannot allocate iova for indirect descriptors (%d)", 
> > r);
> > +        goto err_iova_alloc;
> > +    }
> > +
> > +    r = vhost_vdpa_dma_map(v->shared, v->address_space_id,
> needle.iova,
> > +                           alloc_size, indirect_desc, true);
> 
> Thanks for moving this forward! But the map and unmap operations could be
> heavy in vDPA. My idea was to use a static allocated buffer for the indirect
> table first, but then we need to agree on the right size of it etc.
> 
Yes, since virtio indirect descriptors require a contiguous descriptor array 
and the size is not fixed, using a pool doesn’t really fit. 
I’m planning to implement it using a dual-array approach instead. The updated 
patch will be ready soon.

> Can you profile the impact of it vs converting them to chained? If we find we
> can improve the performance then it would be a great addition.
> 
Regarding the performance impact and the possibility of switching to chained 
descriptors, 
I will include the benchmark results  between chained and indirect descriptors 
in the next version of the patch.

> > +    if (unlikely(r != 0)) {
> > +        error_report("Cannot map indirect descriptors to device: %s (%d)",
> > +                     g_strerror(-r), -r);
> > +        goto err_dma_map;
> > +    }
> > +
> > +    *desc = indirect_desc;
> > +    *iova = needle.iova;
> > +    *size = alloc_size;
> > +    return true;
> > +
> > +err_dma_map:
> > +    vhost_iova_tree_remove(v->shared->iova_tree, needle);
> > +err_iova_alloc:
> > +    munmap(indirect_desc, alloc_size);
> > +    return false;
> > +}
> > +
> > +/**
> > + * Free indirect descriptor table for SVQ
> > + *
> > + * @svq: Shadow virtqueue
> > + * @desc: Descriptor table to free
> > + * @iova: IOVA of the table
> > + * @size: Size of the allocated region
> > + * @opaque: Opaque pointer (vhost_vdpa instance)  */ static void
> > +vhost_vdpa_svq_indirect_desc_free(VhostShadowVirtqueue *svq,
> > +                                               vring_desc_t *desc,
> > +                                               hwaddr iova,
> > +                                               size_t size,
> > +                                               void *opaque) {
> > +    struct vhost_vdpa *v = opaque;
> > +    const DMAMap needle = {
> > +        .translated_addr = (hwaddr)(uintptr_t)desc,
> > +    };
> > +    const DMAMap *result;
> > +    int r;
> > +
> > +    /* Find the mapping in the IOVA tree by HVA */
> > +    result = vhost_iova_tree_find_iova(v->shared->iova_tree, &needle);
> > +    if (unlikely(!result)) {
> > +        error_report("Cannot find indirect descriptor mapping to unmap: "
> > +                     "iova=0x%" HWADDR_PRIx " hva=0x%" HWADDR_PRIx "
> size=%zu",
> > +                     iova, needle.translated_addr, size);
> > +        return;
> > +    }
> > +
> > +    r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, result-
> >iova,
> > +                             size);
> > +    if (unlikely(r != 0)) {
> > +        error_report("Cannot unmap indirect descriptors: "
> > +                     "iova=0x%" HWADDR_PRIx " size=%zu: %s (%d)",
> > +                     result->iova, size, g_strerror(-r), -r);
> > +    }
> > +
> > +    vhost_iova_tree_remove(v->shared->iova_tree, *result);
> > +
> > +    munmap(desc, size);
> > +}
> > +
> >  static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct
> > vhost_vdpa *v)  {
> >      g_autoptr(GPtrArray) shadow_vqs = NULL;
> > +    SVQIndirectOps *indirect_ops;
> > +
> > +    /* Create indirect descriptor ops */
> > +    indirect_ops = g_new0(SVQIndirectOps, 1);
> > +    indirect_ops->alloc = vhost_vdpa_svq_indirect_desc_alloc;
> > +    indirect_ops->free = vhost_vdpa_svq_indirect_desc_free;
> > +    indirect_ops->opaque = v;
> >
> >      shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
> >      for (unsigned n = 0; n < hdev->nvqs; ++n) {
> >          VhostShadowVirtqueue *svq;
> >
> > -        svq = vhost_svq_new(v->shadow_vq_ops, v-
> >shadow_vq_ops_opaque);
> > +        svq = vhost_svq_new(v->shadow_vq_ops, v-
> >shadow_vq_ops_opaque,
> > +                            indirect_ops);
> >          g_ptr_array_add(shadow_vqs, svq);
> >      }
> >
> > @@ -789,10 +904,21 @@ static void vhost_vdpa_svq_cleanup(struct
> > vhost_dev *dev)  {
> >      struct vhost_vdpa *v = dev->opaque;
> >      size_t idx;
> > +    SVQIndirectOps *indirect_ops = NULL;
> >
> >      for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
> > -        vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
> > +        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
> > + idx);
> > +
> > +        /* Save indirect_ops pointer from first SVQ (all share the
> > + same one) */
> 
> Why not put it in shared them?
> 
> But I don't really get why they are callbacks. What's the issue with if 
> (indirect)
> { call to indirect functions } ?
> 
The next version of the patch will use a static allocated buffer, so no 
callbacks will be added.

> > +        if (idx == 0 && svq->indirect_ops) {
> > +            indirect_ops = svq->indirect_ops;
> > +        }
> > +
> > +        vhost_svq_stop(svq);
> >      }
> > +
> > +    g_free(indirect_ops);
> > +
> >      g_ptr_array_free(v->shadow_vqs, true);  }
> >
> > --
> > 2.34.1
> >

Reply via email to