On Sun, Jan 18, 2026 at 9:31 AM Wafer <[email protected]> wrote:
>
>
>
> > -----Original Message-----
> > From: Eugenio Perez Martin <[email protected]>
> > Sent: 2026年1月15日 18:56
> > To: Wafer <[email protected]>
> > Cc: [email protected]; [email protected]; [email protected];
> > [email protected]; [email protected]; Angus Chen
> > <[email protected]>
> > Subject: Re: [PATCH v4 4/4] vhost: SVQ add the indirect descriptors to
> > available ring
> >
> > External Mail: This email originated from OUTSIDE of the organization!
> > Do not click links, open attachments or provide ANY information unless you
> > recognize the sender and know the content is safe.
> >
> >
> > On Wed, Dec 31, 2025 at 12:01 PM Wafer Xie <[email protected]>
> > wrote:
> > >
> > > From: wafer Xie <[email protected]>
> > >
> > > Retrieve the target buffer from the indirect buffers by index, add the
> > > elements sent by the guest into the buffer’s indirect descriptors, and
> > > update freed_head and freed_number. If freed_number is zero, or if the
> > > current buffer’s freed_number is less than the number of elements,
> > > update the buffer state to SVQ_INDIRECT_BUF_FREEING.
> > >
> > > If the current indirect buffer does not have enough freed descriptors
> > > to accommodate the SVQ descriptors, descriptors can be borrowed from
> > > the next indirect buffer.
> > >
> > > Suggested-by: Eugenio Pérez <[email protected]>
> > > Signed-off-by: wafer Xie <[email protected]>
> > > ---
> > > hw/virtio/vhost-shadow-virtqueue.c | 341
> > > +++++++++++++++++++++++++----
> > > 1 file changed, 299 insertions(+), 42 deletions(-)
> > >
> > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> > > b/hw/virtio/vhost-shadow-virtqueue.c
> > > index 4f564f514c..02a238548c 100644
> > > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > > @@ -139,86 +139,340 @@ static bool vhost_svq_translate_addr(const
> > > VhostShadowVirtqueue *svq, }
> > >
> > > /**
> > > - * Write descriptors to SVQ vring
> > > + * Add a single descriptor to a descriptor table
> > > + *
> > > + * @desc: The descriptor to write to
> > > + * @addr: IOVA address
> > > + * @len: Length of the buffer
> > > + * @flags: Descriptor flags (VRING_DESC_F_WRITE, VRING_DESC_F_NEXT)
> > > + * @next: Next descriptor index (only used if VRING_DESC_F_NEXT is
> > > +set) */ static void vhost_svq_vring_add_desc(vring_desc_t *desc,
> > > + hwaddr addr,
> > > + uint32_t len,
> > > + uint16_t flags,
> > > + uint16_t next) {
> > > + desc->addr = cpu_to_le64(addr);
> > > + desc->len = cpu_to_le32(len);
> > > + desc->flags = flags;
> > > + if (flags & cpu_to_le16(VRING_DESC_F_NEXT)) {
> > > + desc->next = cpu_to_le16(next);
> > > + }
> > > +}
> > > +
> > > +/**
> > > + * Write descriptors to a descriptor table (chain or indirect)
> > > *
> > > * @svq: The shadow virtqueue
> > > * @sg: Cache for hwaddr
> > > * @iovec: The iovec from the guest
> > > * @num: iovec length
> > > * @addr: Descriptors' GPAs, if backed by guest memory
> > > + * @descs: The descriptor table to write to
> > > + * @start_idx: Starting index in the descriptor table
> > > + * @offset_idx: Offset for next field calculation (used for indirect)
> > > * @more_descs: True if more descriptors come in the chain
> > > * @write: True if they are writeable descriptors
> > > + * @indirect: True if writing to indirect descriptor table
> > > *
> > > - * Return true if success, false otherwise and print error.
> > > + * Return the next free descriptor index if success, -1 on error.
> > > */
> > > -static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq,
> > hwaddr *sg,
> > > - const struct iovec *iovec,
> > > size_t num,
> > > - const hwaddr *addr, bool
> > > more_descs,
> > > - bool write)
> > > +static int vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq,
> > > + hwaddr *sg,
> > > + const struct iovec *iovec,
> > > + size_t num,
> > > + const hwaddr *addr,
> > > + vring_desc_t *descs,
> > > + uint16_t start_idx,
> > > + size_t offset_idx,
> > > + bool more_descs,
> > > + bool write,
> > > + bool indirect)
> > > {
> > > - uint16_t i = svq->free_head, last = svq->free_head;
> > > - unsigned n;
> > > + uint16_t i = start_idx, last = start_idx;
> > > uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
> > > - vring_desc_t *descs = svq->vring.desc;
> > > bool ok;
> > >
> > > if (num == 0) {
> > > - return true;
> > > + return start_idx;
> > > }
> > >
> > > ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
> > > if (unlikely(!ok)) {
> > > - return false;
> > > + return -1;
> > > }
> > >
> > > - for (n = 0; n < num; n++) {
> > > - if (more_descs || (n + 1 < num)) {
> > > - descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
> > > - descs[i].next = cpu_to_le16(svq->desc_next[i]);
> > > + for (size_t n = 0; n < num; n++) {
> > > + uint16_t desc_flags = flags;
> > > + uint16_t next;
> > > +
> > > + if (indirect) {
> > > + next = offset_idx + n + 1;
> > > } else {
> > > - descs[i].flags = flags;
> > > + next = svq->desc_next[i];
> > > }
> > > - descs[i].addr = cpu_to_le64(sg[n]);
> > > - descs[i].len = cpu_to_le32(iovec[n].iov_len);
> > >
> > > + if (more_descs || (n + 1 < num)) {
> > > + desc_flags |= cpu_to_le16(VRING_DESC_F_NEXT);
> > > + }
> > > + vhost_svq_vring_add_desc(&descs[i], sg[n],
> > > + iovec[n].iov_len, desc_flags,
> > > + next);
> > > last = i;
> > > - i = svq->desc_next[i];
> > > + if (indirect) {
> > > + i++;
> > > + } else {
> > > + i = next;
> > > + }
> > > + }
> > > +
> > > + /* Return the next free index */
> > > + if (!indirect) {
> > > + i = svq->desc_next[last];
> > > + }
> > > + return i;
> > > +}
> > > +
> > > +/**
> > > + * Add descriptors to SVQ vring using indirect descriptors
> > > +(dual-buffer)
> > > + *
> > > + * @svq: The shadow virtqueue
> > > + * @out_sg: The out iovec from the guest
> > > + * @out_num: The out iovec length
> > > + * @out_addr: The out descriptors' GPAs
> > > + * @in_sg: The in iovec from the guest
> > > + * @in_num: The in iovec length
> > > + * @in_addr: The in descriptors' GPAs
> > > + * @sgs: Cache for hwaddr
> > > + * @buf_idx: Index of the indirect buffer to use
> > > + *
> > > + * Return true if success, false otherwise and print error.
> > > + */
> > > +static bool vhost_svq_add_split_indirect(VhostShadowVirtqueue *svq,
> > > + const struct iovec *out_sg,
> > > + size_t out_num,
> > > + const hwaddr *out_addr,
> > > + const struct iovec *in_sg,
> > > + size_t in_num,
> > > + const hwaddr *in_addr,
> > > + hwaddr *sgs, int buf_idx) {
> > > + SVQIndirectDescBuf *buf = &svq->indirect.bufs[buf_idx];
> > > + uint16_t start_idx = buf->start_idx + buf->freed_head;
> > > + size_t total_descs = out_num + in_num;
> > > + hwaddr indirect_iova;
> > > + int ret;
> > > +
> > > + /* Populate indirect descriptor table for out descriptors */
> > > + ret = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num,
> > out_addr,
> > > + svq->indirect.desc, start_idx,
> > > + 0, in_num > 0, false, true);
> > > + if (unlikely(ret < 0)) {
> > > + return false;
> > > + }
> > > +
> > > + /* Populate indirect descriptor table for in descriptors */
> > > + ret = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr,
> > > + svq->indirect.desc, start_idx +
> > > out_num,
> > > + out_num, false, true, true);
> > > + if (unlikely(ret < 0)) {
> > > + return false;
> > > }
> > >
> > > - svq->free_head = svq->desc_next[last];
> > > + /* Calculate IOVA for this indirect descriptor range */
> > > + indirect_iova = svq->indirect.iova + start_idx *
> > > + sizeof(vring_desc_t);
> > > +
> > > + /* Add a single descriptor pointing to the indirect table */
> > > + svq->vring.desc[svq->free_head].addr = cpu_to_le64(indirect_iova);
> > > + svq->vring.desc[svq->free_head].len =
> > > + cpu_to_le32(total_descs * sizeof(vring_desc_t));
> > > + svq->vring.desc[svq->free_head].flags =
> > > + cpu_to_le16(VRING_DESC_F_INDIRECT);
> > > +
> > > + /* Store indirect descriptor info in desc_state */
> > > + svq->desc_state[svq->free_head].indirect_buf_idx = buf_idx;
> > > +
> > > + /* Update buffer state */
> > > + buf->freed_head += total_descs;
> > > + buf->freed_descs -= total_descs;
> > > +
> > > + /* Move free_head forward */
> > > + svq->free_head = svq->desc_next[svq->free_head];
> > > +
> > > return true;
> > > }
> > >
> > > -static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > +/**
> > > + * Try to borrow descriptors from the next buffer segment
> > > + *
> > > + * @svq: The shadow virtqueue
> > > + * @buf_idx: Current buffer index
> > > + * @needed: Number of additional descriptors needed
> > > + *
> > > + * Returns true if successfully borrowed, false otherwise.
> > > + * Note: Last buffer cannot borrow from first buffer (IOVA not
> > contiguous).
> > > + */
> > > +static bool vhost_svq_borrow_from_next(VhostShadowVirtqueue *svq,
> > > + int buf_idx, size_t needed) {
> > > + SVQIndirectDescBuf *cur_buf = &svq->indirect.bufs[buf_idx];
> > > + SVQIndirectDescBuf *next_buf;
> > > + int next_idx;
> > > +
> > > + /* Last buffer cannot borrow from first - IOVA would not be
> > > contiguous
> > */
> > > + if (buf_idx == SVQ_NUM_INDIRECT_BUFS - 1) {
> > > + return false;
> > > + }
> > > +
> > > + next_idx = buf_idx + 1;
> > > + next_buf = &svq->indirect.bufs[next_idx];
> > > +
> > > + /* Can borrow if next buffer is in FREED state and has freed_head at > > > 0
> > */
> > > + if (next_buf->state != SVQ_INDIRECT_BUF_FREED ||
> > > + next_buf->freed_head != 0) {
> > > + return false;
> > > + }
> > > +
> > > + /* Check if next buffer has enough free descriptors to lend */
> > > + if (next_buf->freed_descs < needed) {
> > > + return false;
> > > + }
> > > + /* Borrow descriptors: expand current buffer, shrink next buffer */
> > > + cur_buf->num_descs += needed;
> > > + cur_buf->borrowed_descs += needed;
> > > + cur_buf->freed_descs += needed;
> > > +
> > > + next_buf->start_idx += needed;
> > > + next_buf->num_descs -= needed;
> > > + next_buf->freed_descs -= needed;
> > > +
> >
> > I don't think I understand the algorithm for indirect tables allocation and
> > freeing. Let me know what I'm missing.
> >
> > We can have at max SVQ_NUM_INDIRECT_BUFS (4) indirect tables, as the
> > information we use to locate the indirect table itself is
> > svq->desc_state[used_elem.id].indirect_buf_idx. This is used to access
> > the svq->indirect.bufs, whose size is SVQ_NUM_INDIRECT_BUFS.
> >
> > These tables could grow until the maximum descriptors allowed, which is the
> > size of the vring per VirtIO standard. The function virtqueue_pop already
> > checks this. But this code does not allow more than 4 indirect tables
> > anyway,
> > even if they were smaller than vq size and there is contiguous space in the
> > mapped buffer for the indirect tables.
> >
>
> Thank you for the detailed review! Let me clarify the design as I think
> there may be some misunderstanding.
>
> BUFFER ALLOCATION:
> ------------------
> # define CONFIG_MAX_SKB_FRAGS 17
> In the Linux kernel, virtio-net limits the maximum scatterlist size to
> 17 entries. With a vq_size of 256 and indirect descriptors enabled,
> the virtio-net driver can therefore submit up to 256 * 17
> scatter-gather entries.
>
> Based on this, the implementation allocates an indirect descriptor
> capacity about half of the theoretical maximum, specifically vq_size * 2 * 4.
>
> The total allocation is:
>
> SVQ_NUM_INDIRECT_BUFS(4) * vq_size * 2 descriptors
>
> Each buf[i] has capacity of 2*vq_size, not vq_size. So the full buffer
> is divided into 4 segments, each capable of holding 2x the vring size.
> There is no wasted space.
>
> +---------------------------------------------------------------------+
> | Total: 4 bufs * 2 * vq_size = 8 * vq_size descriptors |
> +----------------+----------------+----------------+------------------+
> | buf[0] | buf[1] | buf[2] | buf[3] |
> | 2*vq_size cap | 2*vq_size cap | 2*vq_size cap | 2*vq_size cap |
> +----------------+----------------+----------------+------------------+
>
> WHY 2*vq_size PER BUFFER:
> -------------------------
> The reason each buffer has 2*vq_size capacity (instead of 1*vq_size) is
> to REDUCE the frequency of borrowing scenarios and improve performance.
>
> With 2*vq_size capacity per buffer:
> - Most requests can be satisfied within a single buffer
> - Borrowing from next buffer only happens in rare edge cases
> - Better cache locality and fewer cross-buffer operations
>
> If we used 1*vq_size per buffer, borrowing would occur much more
> frequently, adding overhead to the common path.
>
> WHY BORROWING IS STILL NEEDED:
> ------------------------------
> Even with 2*vq_size capacity, the borrowing mechanism addresses edge
> cases that you mentioned in your previous review about to reuse the free
> holes.
>
> https://lists.gnu.org/archive/html/qemu-devel/2025-12/msg02779.html
>
> Consider this situation:
>
> buf[0] state after heavy usage:
> +------------------------------------------------------------+
> | freed_descs = 4 (only 4 free at the tail) |
> | freeing_descs = 100 (waiting for device to return) |
> | freed_head = 508 |
> | |
> | [0.....507: IN_USE/FREEING] [508,509,510,511: FREE] |
> +------------------------------------------------------------+
>
> Now driver sends a request needing 8 descriptors:
>
> WITHOUT borrowing:
> - buf[0] has only 4 freed_descs < 8 needed
> - buf[0] transitions to FREEING state
> - Those 4 free descriptors are WASTED
> - Must use buf[1] from scratch
>
> WITH borrowing:
> - buf[0] has 4 freed_descs, needs 4 more
> - Borrow 4 from buf[1]'s head (IOVA is contiguous!)
> - Now have 8 contiguous descriptors: [508-511 from buf[0]] + [0-3 from
> buf[1]]
>
> buf[0]: buf[1]:
> +--------------------------------+ +----------------------+
> | [508,509,510,511,|0,1,2,3] | | start_idx: 4 |
> | <-- original --> <--borrowed->| | num_descs: 2*vq-4 |
> | 8 contiguous descs | | |
> +--------------------------------+ +----------------------+
> Contiguous IOVA!
>
I understand that algorithm, but I don't get what it buys. As you're
only allowing 4 indirect tables in flight at max, you are just moving
the descriptors you tag as "wasted" from the tail of the queue to the
inter-buf space. And this edge case has comparatively way more
complexity than the regular code path.
Let's try from a different angle, can you describe a buf[0,1,2,3]
state where forwarding an indirect table is not possible without
splitting it between many buffers?
> BORROWING RULES:
> ----------------
> To keep the design simple and predictable:
>
> 1. Each buf[i] can only borrow ONCE from buf[i+1]
> 2. After borrowing, svq->indirect.current_buf is set to buf[i+1]
> 3. Subsequent requests will use buf[i+1] as the starting point
>
> This ensures:
> - No complex multi-level borrowing chains
> - Predictable buffer rotation
> - Simple bookkeeping (only track borrowed_descs per buffer)
>
> Example flow:
>
> +-------------------------------------------------------------+
> | current_buf = 0 |
> | buf[0] needs to borrow from buf[1] |
> | -> borrow succeeds |
> | -> current_buf = 1 (next requests start from buf[1]) |
> | |
> | Later, buf[1] needs to borrow from buf[2] |
> | -> borrow succeeds |
> | -> current_buf = 2 (next requests start from buf[2]) |
> +-------------------------------------------------------------+
>
> DESIGN SUMMARY:
> ---------------
> +-------------------------------------------------------------+
> | 2*vq_size per buffer -> Reduces borrowing frequency |
> | (performance optimization) |
> +-------------------------------------------------------------+
> | Borrowing mechanism -> Handles edge cases when buffer |
> | is almost full but not empty |
> | (avoids descriptor waste) |
> +-------------------------------------------------------------+
> | One-time borrow rule -> Simple state management |
> | (current_buf moves to next) |
> +-------------------------------------------------------------+
>
> WHY NOT FIXED CHUNKS:
> ---------------------
> Fixed vq_size chunks would waste descriptors in the scenario above.
> The 2*vq_size capacity + borrowing allows:
>
> 1. Single request can use up to 2*vq_size descs (exceeding vq_size)
> 2. Partially-used buffers can still contribute to new requests
> 3. Maximum utilization of the pre-allocated IOVA space
>
> REGARDING BUDDY ALLOCATOR:
> --------------------------
> A buddy allocator would be more flexible, but adds complexity for
> managing splits/merges. The current design with 4 large segments +
> borrowing provides a good balance:
> - Simple allocation (round-robin through 4 bufs)
> - Efficient utilization (borrowing avoids waste)
> - Predictable performance (no fragmentation)
>
> Does this clarify the design? Please let me know if you have
> further questions.
>
> Thanks,
>
> > The code allocates a buffer big enough for 8 indirect tables in
> > vhost_vdpa_svq_init_indirect, as they can only be svq->vring.num big, so
> > half of it will never be allocated under any circumstance.
> >
> > Now let's say the driver allocates 4 indirect tables (IT). The shared
> > buffer now
> > looks like:
> >
> > [
> > [1st IT],
> > [2nd IT],
> > [3rd IT],
> > [4th IT],
> > [free size for at least 4 indirect tables more] ]
> >
> > Now the device starts freeing indirect descriptors and the dance of
> > reallocating could start. The condition for borrowing and returning
> > descriptors may be met at 2*vq_size boundaries. But, by that time, it is
> > guaranteed that you can allocate an indirect table of vq_num descriptors at
> > some position N*vq_num, with N between 0 and 7.
> >
> > Why complicate the code with this dance of resizing
> > svq->indirect.bufs[j] then? Why not divide all the mapped buffer in 8
> > chunks of vq size and never move them?
> >
> > I would prefer to remove the 4 (or any number) indirect tables limitation
> > and
> > use something like the IOVA allocator or the buddy allocator to find
> > contiguous space though.
> >
> > Thanks!
> >
> > > + return true;
> > > +}
> > > +
> > > +/**
> > > + * Try to get a freed indirect buffer for use
> > > + *
> > > + * @svq: The shadow virtqueue
> > > + * @total_descs: Number of descriptors needed
> > > + *
> > > + * Returns buffer index (0 to SVQ_NUM_INDIRECT_BUFS-1)
> > > + * if available, -1 if none available.
> > > + */
> > > +static int vhost_svq_get_indirect_buf(VhostShadowVirtqueue *svq,
> > > + size_t total_descs) {
> > > + int cur = svq->indirect.current_buf;
> > > + SVQIndirectDescBuf *buf;
> > > +
> > > + if (!svq->indirect.enabled) {
> > > + return -1;
> > > + }
> > > +
> > > + if ( cur < 0) {
> > > + cur = 0;
> > > + }
> > > + /* Start from current or first buffer, try all buffers in order */
> > > + for (int i = 0; i < SVQ_NUM_INDIRECT_BUFS; i++) {
> > > + int idx = (cur + i) % SVQ_NUM_INDIRECT_BUFS;
> > > + buf = &svq->indirect.bufs[idx];
> > > +
> > > + if (buf->state != SVQ_INDIRECT_BUF_FREED) {
> > > + continue;
> > > + }
> > > +
> > > + /* Check if we have enough free descriptors */
> > > + if (buf->freed_descs >= total_descs) {
> > > + svq->indirect.current_buf = idx;
> > > + return idx;
> > > + }
> > > +
> > > + /* Try to borrow from next buffer */
> > > + size_t needed = total_descs - buf->freed_descs;
> > > + if ((buf->freed_descs > 0) &&
> > > + vhost_svq_borrow_from_next(svq, idx, needed)) {
> > > + svq->indirect.current_buf = idx + 1;
> > > + return idx;
> > > + }
> > > +
> > > + /* Not enough space, mark as FREEING if it's the current buffer
> > > */
> > > + buf->state = SVQ_INDIRECT_BUF_FREEING;
> > > + }
> > > +
> > > + /* All buffers unavailable, fallback to chain mode */
> > > + return -1;
> > > +}
> > > +
> > > +static int vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > const struct iovec *out_sg, size_t
> > > out_num,
> > > const hwaddr *out_addr,
> > > const struct iovec *in_sg, size_t in_num,
> > > - const hwaddr *in_addr, unsigned *head)
> > > + const hwaddr *in_addr, unsigned *head,
> > > + bool *used_indirect)
> > > {
> > > unsigned avail_idx;
> > > vring_avail_t *avail = svq->vring.avail;
> > > bool ok;
> > > + int ret;
> > > g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
> > > + size_t total_descs = out_num + in_num;
> > > + int indirect_buf_idx = -1;
> > >
> > > *head = svq->free_head;
> > > + *used_indirect = false;
> > >
> > > /* We need some descriptors here */
> > > if (unlikely(!out_num && !in_num)) {
> > > qemu_log_mask(LOG_GUEST_ERROR,
> > > "Guest provided element with no descriptors");
> > > - return false;
> > > + return -EINVAL;
> > > }
> > >
> > > - ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
> > > - in_num > 0, false);
> > > - if (unlikely(!ok)) {
> > > - return false;
> > > + /* Try to use indirect descriptors if feature is negotiated and
> > > total > 1 */
> > > + if (virtio_vdev_has_feature(svq->vdev,
> > VIRTIO_RING_F_INDIRECT_DESC) &&
> > > + total_descs > 1) {
> > > + indirect_buf_idx = vhost_svq_get_indirect_buf(svq,
> > > + total_descs);
> > > }
> > >
> > > - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr,
> > false,
> > > - true);
> > > - if (unlikely(!ok)) {
> > > - return false;
> > > + if (indirect_buf_idx >= 0) {
> > > + /* Indirect mode: only need 1 main descriptor slot */
> > > + if (unlikely(vhost_svq_available_slots(svq) < 1)) {
> > > + return -ENOSPC;
> > > + }
> > > +
> > > + /* Use indirect mode */
> > > + ok = vhost_svq_add_split_indirect(svq, out_sg, out_num, out_addr,
> > > + in_sg, in_num, in_addr,
> > > + sgs, indirect_buf_idx);
> > > + if (unlikely(!ok)) {
> > > + error_report("indirect error, out_num %zu in_num %zu "
> > > + "avail index %u head %u",
> > > + out_num, in_num, svq->shadow_avail_idx, *head);
> > > + return -EINVAL;
> > > + }
> > > + *used_indirect = true;
> > > + } else {
> > > + /* Chain mode: need total_descs descriptor slots */
> > > + if (unlikely(vhost_svq_available_slots(svq) < total_descs)) {
> > > + return -ENOSPC;
> > > + }
> > > +
> > > + /* Use direct (chain) mode */
> > > + svq->desc_state[svq->free_head].indirect_buf_idx = -1;
> > > +
> > > + ret = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num,
> > out_addr,
> > > + svq->vring.desc,
> > > svq->free_head, 0,
> > > + in_num > 0, false, false);
> > > + if (unlikely(ret < 0)) {
> > > + return -EINVAL;
> > > + }
> > > + svq->free_head = ret;
> > > +
> > > + ret = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num,
> > > in_addr,
> > > + svq->vring.desc,
> > > svq->free_head, 0,
> > > + false, true, false);
> > > + if (unlikely(ret < 0)) {
> > > + return -EINVAL;
> > > + }
> > > + svq->free_head = ret;
> > > }
> > >
> > > /*
> > > @@ -233,7 +487,7 @@ static bool
> > vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > smp_wmb();
> > > avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> > >
> > > - return true;
> > > + return 0;
> > > }
> > >
> > > static void vhost_svq_kick(VhostShadowVirtqueue *svq) @@ -249,7
> > > +503,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> > > if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> > > uint16_t avail_event = le16_to_cpu(
> > > *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
> > > - needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> > svq->shadow_avail_idx - 1);
> > > + needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> > > + svq->shadow_avail_idx - 1);
> > > } else {
> > > needs_kick =
> > > !(svq->vring.used->flags &
> > > cpu_to_le16(VRING_USED_F_NO_NOTIFY));
> > > @@ -274,19 +529,21 @@ int vhost_svq_add(VhostShadowVirtqueue *svq,
> > > const struct iovec *out_sg, {
> > > unsigned qemu_head;
> > > unsigned ndescs = in_num + out_num;
> > > - bool ok;
> > > + int r;
> > > + bool used_indirect = false;
> > >
> > > - if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
> > > - return -ENOSPC;
> > > + r = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg,
> > in_num,
> > > + in_addr, &qemu_head, &used_indirect);
> > > + if (unlikely(r != 0)) {
> > > + return r;
> > > }
> > >
> > > - ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg,
> > in_num,
> > > - in_addr, &qemu_head);
> > > - if (unlikely(!ok)) {
> > > - return -EINVAL;
> > > + /* If using indirect, only 1 main descriptor is used; otherwise
> > > ndescs */
> > > + if (used_indirect) {
> > > + svq->num_free -= 1;
> > > + } else {
> > > + svq->num_free -= ndescs;
> > > }
> > > -
> > > - svq->num_free -= ndescs;
> > > svq->desc_state[qemu_head].elem = elem;
> > > svq->desc_state[qemu_head].ndescs = ndescs;
> > > vhost_svq_kick(svq);
> > > --
> > > 2.34.1
> > >
>
>