From: wafer Xie <[email protected]>

Retrieve the target buffer from the indirect buffers by index,
add the elements sent by the guest into the buffer’s indirect descriptors,
and update freed_head and freed_number. If freed_number is zero,
or if the current buffer’s freed_number is less than the number of elements,
update the buffer state to SVQ_INDIRECT_BUF_FREEING.

If the current indirect buffer does not have enough freed descriptors
to accommodate the SVQ descriptors,
descriptors can be borrowed from the next indirect buffer.

Suggested-by: Eugenio Pérez <[email protected]>
Signed-off-by: wafer Xie <[email protected]>
---
 hw/virtio/vhost-shadow-virtqueue.c | 341 +++++++++++++++++++++++++----
 1 file changed, 299 insertions(+), 42 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 4f564f514c..02a238548c 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -139,86 +139,340 @@ static bool vhost_svq_translate_addr(const 
VhostShadowVirtqueue *svq,
 }
 
 /**
- * Write descriptors to SVQ vring
+ * Add a single descriptor to a descriptor table
+ *
+ * @desc: The descriptor to write to
+ * @addr: IOVA address
+ * @len: Length of the buffer
+ * @flags: Descriptor flags (VRING_DESC_F_WRITE, VRING_DESC_F_NEXT)
+ * @next: Next descriptor index (only used if VRING_DESC_F_NEXT is set)
+ */
+static void vhost_svq_vring_add_desc(vring_desc_t *desc,
+                                       hwaddr addr,
+                                       uint32_t len,
+                                       uint16_t flags,
+                                       uint16_t next)
+{
+    desc->addr = cpu_to_le64(addr);
+    desc->len = cpu_to_le32(len);
+    desc->flags = flags;
+    if (flags & cpu_to_le16(VRING_DESC_F_NEXT)) {
+        desc->next = cpu_to_le16(next);
+    }
+}
+
+/**
+ * Write descriptors to a descriptor table (chain or indirect)
  *
  * @svq: The shadow virtqueue
  * @sg: Cache for hwaddr
  * @iovec: The iovec from the guest
  * @num: iovec length
  * @addr: Descriptors' GPAs, if backed by guest memory
+ * @descs: The descriptor table to write to
+ * @start_idx: Starting index in the descriptor table
+ * @offset_idx: Offset for next field calculation (used for indirect)
  * @more_descs: True if more descriptors come in the chain
  * @write: True if they are writeable descriptors
+ * @indirect: True if writing to indirect descriptor table
  *
- * Return true if success, false otherwise and print error.
+ * Return the next free descriptor index if success, -1 on error.
  */
-static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
-                                        const struct iovec *iovec, size_t num,
-                                        const hwaddr *addr, bool more_descs,
-                                        bool write)
+static int vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq,
+                                       hwaddr *sg,
+                                       const struct iovec *iovec,
+                                       size_t num,
+                                       const hwaddr *addr,
+                                       vring_desc_t *descs,
+                                       uint16_t start_idx,
+                                       size_t offset_idx,
+                                       bool more_descs,
+                                       bool write,
+                                       bool indirect)
 {
-    uint16_t i = svq->free_head, last = svq->free_head;
-    unsigned n;
+    uint16_t i = start_idx, last = start_idx;
     uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
-    vring_desc_t *descs = svq->vring.desc;
     bool ok;
 
     if (num == 0) {
-        return true;
+        return start_idx;
     }
 
     ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
     if (unlikely(!ok)) {
-        return false;
+        return -1;
     }
 
-    for (n = 0; n < num; n++) {
-        if (more_descs || (n + 1 < num)) {
-            descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
-            descs[i].next = cpu_to_le16(svq->desc_next[i]);
+    for (size_t n = 0; n < num; n++) {
+        uint16_t desc_flags = flags;
+        uint16_t next;
+
+        if (indirect) {
+            next = offset_idx + n + 1;
         } else {
-            descs[i].flags = flags;
+            next = svq->desc_next[i];
         }
-        descs[i].addr = cpu_to_le64(sg[n]);
-        descs[i].len = cpu_to_le32(iovec[n].iov_len);
 
+        if (more_descs || (n + 1 < num)) {
+            desc_flags |= cpu_to_le16(VRING_DESC_F_NEXT);
+        }
+        vhost_svq_vring_add_desc(&descs[i], sg[n],
+                                   iovec[n].iov_len, desc_flags, next);
         last = i;
-        i = svq->desc_next[i];
+        if (indirect) {
+            i++;
+        } else {
+            i = next;
+        }
+    }
+
+    /* Return the next free index */
+    if (!indirect) {
+        i = svq->desc_next[last];
+    }
+    return i;
+}
+
+/**
+ * Add descriptors to SVQ vring using indirect descriptors (dual-buffer)
+ *
+ * @svq: The shadow virtqueue
+ * @out_sg: The out iovec from the guest
+ * @out_num: The out iovec length
+ * @out_addr: The out descriptors' GPAs
+ * @in_sg: The in iovec from the guest
+ * @in_num: The in iovec length
+ * @in_addr: The in descriptors' GPAs
+ * @sgs: Cache for hwaddr
+ * @buf_idx: Index of the indirect buffer to use
+ *
+ * Return true if success, false otherwise and print error.
+ */
+static bool vhost_svq_add_split_indirect(VhostShadowVirtqueue *svq,
+                                         const struct iovec *out_sg,
+                                         size_t out_num,
+                                         const hwaddr *out_addr,
+                                         const struct iovec *in_sg,
+                                         size_t in_num,
+                                         const hwaddr *in_addr,
+                                         hwaddr *sgs, int buf_idx)
+{
+    SVQIndirectDescBuf *buf = &svq->indirect.bufs[buf_idx];
+    uint16_t start_idx = buf->start_idx + buf->freed_head;
+    size_t total_descs = out_num + in_num;
+    hwaddr indirect_iova;
+    int ret;
+
+    /* Populate indirect descriptor table for out descriptors */
+    ret = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
+                                      svq->indirect.desc, start_idx,
+                                      0, in_num > 0, false, true);
+    if (unlikely(ret < 0)) {
+        return false;
+    }
+
+    /* Populate indirect descriptor table for in descriptors */
+    ret = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr,
+                                      svq->indirect.desc, start_idx + out_num,
+                                      out_num, false, true, true);
+    if (unlikely(ret < 0)) {
+        return false;
     }
 
-    svq->free_head = svq->desc_next[last];
+    /* Calculate IOVA for this indirect descriptor range */
+    indirect_iova = svq->indirect.iova + start_idx * sizeof(vring_desc_t);
+
+    /* Add a single descriptor pointing to the indirect table */
+    svq->vring.desc[svq->free_head].addr = cpu_to_le64(indirect_iova);
+    svq->vring.desc[svq->free_head].len =
+            cpu_to_le32(total_descs * sizeof(vring_desc_t));
+    svq->vring.desc[svq->free_head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT);
+
+    /* Store indirect descriptor info in desc_state */
+    svq->desc_state[svq->free_head].indirect_buf_idx = buf_idx;
+
+    /* Update buffer state */
+    buf->freed_head += total_descs;
+    buf->freed_descs -= total_descs;
+
+    /* Move free_head forward */
+    svq->free_head = svq->desc_next[svq->free_head];
+
     return true;
 }
 
-static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
+/**
+ * Try to borrow descriptors from the next buffer segment
+ *
+ * @svq: The shadow virtqueue
+ * @buf_idx: Current buffer index
+ * @needed: Number of additional descriptors needed
+ *
+ * Returns true if successfully borrowed, false otherwise.
+ * Note: Last buffer cannot borrow from first buffer (IOVA not contiguous).
+ */
+static bool vhost_svq_borrow_from_next(VhostShadowVirtqueue *svq,
+                                       int buf_idx, size_t needed)
+{
+    SVQIndirectDescBuf *cur_buf = &svq->indirect.bufs[buf_idx];
+    SVQIndirectDescBuf *next_buf;
+    int next_idx;
+
+    /* Last buffer cannot borrow from first - IOVA would not be contiguous */
+    if (buf_idx == SVQ_NUM_INDIRECT_BUFS - 1) {
+        return false;
+    }
+
+    next_idx = buf_idx + 1;
+    next_buf = &svq->indirect.bufs[next_idx];
+
+    /* Can borrow if next buffer is in FREED state and has freed_head at 0 */
+    if (next_buf->state != SVQ_INDIRECT_BUF_FREED ||
+        next_buf->freed_head != 0) {
+        return false;
+    }
+
+    /* Check if next buffer has enough free descriptors to lend */
+    if (next_buf->freed_descs < needed) {
+        return false;
+    }
+    /* Borrow descriptors: expand current buffer, shrink next buffer */
+    cur_buf->num_descs += needed;
+    cur_buf->borrowed_descs += needed;
+    cur_buf->freed_descs += needed;
+
+    next_buf->start_idx += needed;
+    next_buf->num_descs -= needed;
+    next_buf->freed_descs -= needed;
+
+    return true;
+}
+
+/**
+ * Try to get a freed indirect buffer for use
+ *
+ * @svq: The shadow virtqueue
+ * @total_descs: Number of descriptors needed
+ *
+ * Returns buffer index (0 to SVQ_NUM_INDIRECT_BUFS-1)
+ * if available, -1 if none available.
+ */
+static int vhost_svq_get_indirect_buf(VhostShadowVirtqueue *svq,
+                                      size_t total_descs)
+{
+    int cur = svq->indirect.current_buf;
+    SVQIndirectDescBuf *buf;
+
+    if (!svq->indirect.enabled) {
+        return -1;
+    }
+
+    if ( cur < 0) {
+        cur = 0;
+    }
+    /* Start from current or first buffer, try all buffers in order */
+    for (int i = 0; i < SVQ_NUM_INDIRECT_BUFS; i++) {
+        int idx = (cur + i) % SVQ_NUM_INDIRECT_BUFS;
+        buf = &svq->indirect.bufs[idx];
+
+        if (buf->state != SVQ_INDIRECT_BUF_FREED) {
+            continue;
+        }
+
+        /* Check if we have enough free descriptors */
+        if (buf->freed_descs >= total_descs) {
+            svq->indirect.current_buf = idx;
+            return idx;
+        }
+
+        /* Try to borrow from next buffer */
+        size_t needed = total_descs - buf->freed_descs;
+        if ((buf->freed_descs > 0) &&
+            vhost_svq_borrow_from_next(svq, idx, needed)) {
+            svq->indirect.current_buf = idx + 1;
+            return idx;
+        }
+
+        /* Not enough space, mark as FREEING if it's the current buffer */
+        buf->state = SVQ_INDIRECT_BUF_FREEING;
+    }
+
+    /* All buffers unavailable, fallback to chain mode */
+    return -1;
+}
+
+static int vhost_svq_add_split(VhostShadowVirtqueue *svq,
                                 const struct iovec *out_sg, size_t out_num,
                                 const hwaddr *out_addr,
                                 const struct iovec *in_sg, size_t in_num,
-                                const hwaddr *in_addr, unsigned *head)
+                                const hwaddr *in_addr, unsigned *head,
+                                bool *used_indirect)
 {
     unsigned avail_idx;
     vring_avail_t *avail = svq->vring.avail;
     bool ok;
+    int ret;
     g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
+    size_t total_descs = out_num + in_num;
+    int indirect_buf_idx = -1;
 
     *head = svq->free_head;
+    *used_indirect = false;
 
     /* We need some descriptors here */
     if (unlikely(!out_num && !in_num)) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Guest provided element with no descriptors");
-        return false;
+        return -EINVAL;
     }
 
-    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
-                                     in_num > 0, false);
-    if (unlikely(!ok)) {
-        return false;
+    /* Try to use indirect descriptors if feature is negotiated and total > 1 
*/
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
+        total_descs > 1) {
+        indirect_buf_idx = vhost_svq_get_indirect_buf(svq, total_descs);
     }
 
-    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false,
-                                     true);
-    if (unlikely(!ok)) {
-        return false;
+    if (indirect_buf_idx >= 0) {
+        /* Indirect mode: only need 1 main descriptor slot */
+        if (unlikely(vhost_svq_available_slots(svq) < 1)) {
+            return -ENOSPC;
+        }
+
+        /* Use indirect mode */
+        ok = vhost_svq_add_split_indirect(svq, out_sg, out_num, out_addr,
+                                          in_sg, in_num, in_addr,
+                                          sgs, indirect_buf_idx);
+        if (unlikely(!ok)) {
+            error_report("indirect error, out_num %zu in_num %zu "
+                         "avail index %u head %u",
+                         out_num, in_num, svq->shadow_avail_idx, *head);
+            return -EINVAL;
+        }
+        *used_indirect = true;
+    } else {
+        /* Chain mode: need total_descs descriptor slots */
+        if (unlikely(vhost_svq_available_slots(svq) < total_descs)) {
+            return -ENOSPC;
+        }
+
+        /* Use direct (chain) mode */
+        svq->desc_state[svq->free_head].indirect_buf_idx = -1;
+
+        ret = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
+                                          svq->vring.desc, svq->free_head, 0,
+                                          in_num > 0, false, false);
+        if (unlikely(ret < 0)) {
+            return -EINVAL;
+        }
+        svq->free_head = ret;
+
+        ret = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr,
+                                          svq->vring.desc, svq->free_head, 0,
+                                          false, true, false);
+        if (unlikely(ret < 0)) {
+            return -EINVAL;
+        }
+        svq->free_head = ret;
     }
 
     /*
@@ -233,7 +487,7 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
     smp_wmb();
     avail->idx = cpu_to_le16(svq->shadow_avail_idx);
 
-    return true;
+    return 0;
 }
 
 static void vhost_svq_kick(VhostShadowVirtqueue *svq)
@@ -249,7 +503,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
         uint16_t avail_event = le16_to_cpu(
                 *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
-        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, 
svq->shadow_avail_idx - 1);
+        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
+                                      svq->shadow_avail_idx - 1);
     } else {
         needs_kick =
                 !(svq->vring.used->flags & 
cpu_to_le16(VRING_USED_F_NO_NOTIFY));
@@ -274,19 +529,21 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct 
iovec *out_sg,
 {
     unsigned qemu_head;
     unsigned ndescs = in_num + out_num;
-    bool ok;
+    int r;
+    bool used_indirect = false;
 
-    if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
-        return -ENOSPC;
+    r = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
+                             in_addr, &qemu_head, &used_indirect);
+    if (unlikely(r != 0)) {
+        return r;
     }
 
-    ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
-                             in_addr, &qemu_head);
-    if (unlikely(!ok)) {
-        return -EINVAL;
+    /* If using indirect, only 1 main descriptor is used; otherwise ndescs */
+    if (used_indirect) {
+        svq->num_free -= 1;
+    } else {
+        svq->num_free -= ndescs;
     }
-
-    svq->num_free -= ndescs;
     svq->desc_state[qemu_head].elem = elem;
     svq->desc_state[qemu_head].ndescs = ndescs;
     vhost_svq_kick(svq);
-- 
2.34.1


Reply via email to