On Wed, May 17, 2023 at 11:02 PM Hawkins Jiawei <yin31...@gmail.com> wrote:
>
> Sorry for forgetting cc when replying to the email.
>
> On Wed, 17 May 2023 at 16:22, Eugenio Perez Martin <epere...@redhat.com> 
> wrote:
> >
> > On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasow...@redhat.com> wrote:
> > >
> > > On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31...@gmail.com> wrote:
> > > >
> > > > This patch introduces the vhost_vdpa_net_cvq_add() and
> > > > refactors the vhost_vdpa_net_load*(), so that QEMU can
> > > > send CVQ state load commands in parallel.
> > > >
> > > > To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> > > > to add SVQ control commands to SVQ and kick the device,
> > > > but does not poll the device used buffers. QEMU will not
> > > > poll and check the device used buffers in vhost_vdpa_net_load()
> > > > until all CVQ state load commands have been sent to the device.
> > > >
> > > > What's more, in order to avoid buffer overwriting caused by
> > > > using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> > > > buffer for all CVQ state load commands when sending
> > > > CVQ state load commands in parallel, this patch introduces
> > > > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> > > > pointing to the available buffer for in descriptor and
> > > > out descriptor, so that different CVQ state load commands can
> > > > use their unique buffer.
> > > >
> > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> > > > Signed-off-by: Hawkins Jiawei <yin31...@gmail.com>
> > > > ---
> > > >  net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
> > > >  1 file changed, 120 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > index 10804c7200..14e31ca5c5 100644
> > > > --- a/net/vhost-vdpa.c
> > > > +++ b/net/vhost-vdpa.c
> > > > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState 
> > > > *nc)
> > > >      vhost_vdpa_net_client_stop(nc);
> > > >  }
> > > >
> > > > +/**
> > > > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> > > > + * kicks the device but does not poll the device used buffers.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> > > > +                                void **out_cursor, size_t out_len,
> > >
> > > Can we track things like cursors in e.g VhostVDPAState ?
> > >
> >
> > Cursors will only be used at device startup. After that, cursors are
> > always the full buffer. Do we want to "pollute" VhostVDPAState with
> > things that will not be used after the startup?

So it's the cursor of the cvq_cmd_out_buffer, it would be more elegant
to keep it with where the cvq_cmd_out_buffer is placed. It can avoid
passing cursors in several levels.

Or it would be even better to have some buffer allocation helpers to
alloc and free in and out buffers.

Thanks

> >
> > Maybe we can create a struct for them and then pass just this struct?
>
> Yes, Currently, the new version of vhost_vdpa_net_cvq_add() is only
> called in vhost_vdpa_net_load() at startup, so these cursors will not be
> used after startup.
>
> If needed, we can create a `VhostVDPACursor` as suggested by Eugenio.
>
> >
> > Thanks!
> >
> > > > +                                virtio_net_ctrl_ack **in_cursor, 
> > > > size_t in_len)
> > > > +{
> > > > +    /* Buffers for the device */
> > > > +    const struct iovec out = {
> > > > +        .iov_base = *out_cursor,
> > > > +        .iov_len = out_len,
> > > > +    };
> > > > +    const struct iovec in = {
> > > > +        .iov_base = *in_cursor,
> > > > +        .iov_len = sizeof(virtio_net_ctrl_ack),
> > > > +    };
> > > > +    VhostShadowVirtqueue *svq = 
> > > > g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > +    int r;
> > > > +
> > > > +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> > > > +    if (unlikely(r != 0)) {
> > > > +        if (unlikely(r == -ENOSPC)) {
> > > > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device 
> > > > queue\n",
> > > > +                          __func__);
> > > > +        }
> > > > +        return r;
> > > > +    }
> > > > +
> > > > +    /* Update the cursor */
> > > > +    *out_cursor += out_len;
> > > > +    *in_cursor += 1;
> > > > +
> > > > +    return 1;
> > > > +}
> > > > +
> > > >  /**
> > > >   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
> > > >   * kicks the device and polls the device used buffers.
> > > > @@ -628,69 +666,82 @@ static ssize_t 
> > > > vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
> > > >      return vhost_svq_poll(svq);
> > > >  }
> > > >
> > > > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t 
> > > > class,
> > > > -                                       uint8_t cmd, const void *data,
> > > > -                                       size_t data_size)
> > > > +
> > > > +/**
> > > > + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> > > > +                                void **out_cursor, uint8_t class, 
> > > > uint8_t cmd,
> > > > +                                const void *data, size_t data_size,
> > > > +                                virtio_net_ctrl_ack **in_cursor)
> > > >  {
> > > >      const struct virtio_net_ctrl_hdr ctrl = {
> > > >          .class = class,
> > > >          .cmd = cmd,
> > > >      };
> > > >
> > > > -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - 
> > > > sizeof(ctrl));
> > > > +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> > > > +                          (*out_cursor - s->cvq_cmd_out_buffer));
> > > > +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - 
> > > > sizeof(ctrl) -
> > > > +                       (*out_cursor - s->cvq_cmd_out_buffer));
> > > >
> > > > -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> > > > -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> > > > +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> > > > +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
> > > >
> > > > -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> > > > -                                  sizeof(virtio_net_ctrl_ack));
> > > > +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + 
> > > > data_size,
> > > > +                                  in_cursor, 
> > > > sizeof(virtio_net_ctrl_ack));
> > > >  }
> > > >
> > > > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet 
> > > > *n)
> > > > +/**
> > > > + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet 
> > > > *n,
> > > > +                            void **out_cursor, virtio_net_ctrl_ack 
> > > > **in_cursor)
> > > >  {
> > > >      uint64_t features = n->parent_obj.guest_features;
> > > >      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> > > > -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, 
> > > > VIRTIO_NET_CTRL_MAC,
> > > > -                                                  
> > > > VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > -                                                  n->mac, 
> > > > sizeof(n->mac));
> > > > -        if (unlikely(dev_written < 0)) {
> > > > -            return dev_written;
> > > > -        }
> > > > -
> > > > -        return *s->status != VIRTIO_NET_OK;
> > > > +        return vhost_vdpa_net_load_cmd(s, out_cursor, 
> > > > VIRTIO_NET_CTRL_MAC,
> > > > +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > +                                       n->mac, sizeof(n->mac), 
> > > > in_cursor);
> > > >      }
> > > >
> > > >      return 0;
> > > >  }
> > > >
> > > > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> > > > -                                  const VirtIONet *n)
> > > > +/**
> > > > + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet 
> > > > *n,
> > > > +                            void **out_cursor, virtio_net_ctrl_ack 
> > > > **in_cursor)
> > > >  {
> > > >      struct virtio_net_ctrl_mq mq;
> > > >      uint64_t features = n->parent_obj.guest_features;
> > > > -    ssize_t dev_written;
> > > >
> > > >      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> > > >          return 0;
> > > >      }
> > > >
> > > >      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
> > > > -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> > > > -                                          
> > > > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
> > > > -                                          sizeof(mq));
> > > > -    if (unlikely(dev_written < 0)) {
> > > > -        return dev_written;
> > > > -    }
> > > > -
> > > > -    return *s->status != VIRTIO_NET_OK;
> > > > +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> > > > +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> > > > +                                   &mq, sizeof(mq), in_cursor);
> > > >  }
> > > >
> > > >  static int vhost_vdpa_net_load(NetClientState *nc)
> > > >  {
> > > >      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > +    VhostShadowVirtqueue *svq;
> > > > +    void *out_cursor;
> > > > +    virtio_net_ctrl_ack *in_cursor;
> > > >      struct vhost_vdpa *v = &s->vhost_vdpa;
> > > >      const VirtIONet *n;
> > > > -    int r;
> > > > +    ssize_t cmds_in_flight = 0, dev_written, r;
> > > >
> > > >      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > > >
> > > > @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> > > >      }
> > > >
> > > >      n = VIRTIO_NET(v->dev->vdev);
> > > > -    r = vhost_vdpa_net_load_mac(s, n);
> > > > +    out_cursor = s->cvq_cmd_out_buffer;
> > > > +    in_cursor = s->status;
> > > > +
> > > > +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
> > > >      if (unlikely(r < 0))
> > > >          return r;
> > > >      }
> > > > -    r = vhost_vdpa_net_load_mq(s, n);
> > > > -    if (unlikely(r)) {
> > > > +    cmds_in_flight += r;
> > > > +
> > > > +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> > > > +    if (unlikely(r < 0)) {
> > > >          return r;
> > > >      }
> > > > +    cmds_in_flight += r;
> > > > +
> > > > +    /* Poll for all used buffer from device */
> > > > +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > +    while (cmds_in_flight > 0) {
> > > > +        /*
> > > > +         * We can poll here since we've had BQL from the time we sent 
> > > > the
> > > > +         * descriptor. Also, we need to take the answer before SVQ 
> > > > pulls
> > > > +         * by itself, when BQL is released
> > > > +         */
> > > > +        dev_written = vhost_svq_poll(svq);
> > >
> > > I'd tweak vhost_svq_poll to accept cmds_in_flight.
>
> That sounds great!
> I will refactor the code here and send the v2 patch after
> your patch.
>
> Thanks!
>
> > >
> > > Thanks
> > >
> > > > +
> > > > +        if (unlikely(!dev_written)) {
> > > > +            /*
> > > > +             * vhost_svq_poll() return 0 when something wrong, such as
> > > > +             * QEMU waits for too long time or no available used buffer
> > > > +             * from device, and there is no need to continue polling
> > > > +             * in this case.
> > > > +             */
> > > > +            return -EINVAL;
> > > > +        }
> > > > +
> > > > +        --cmds_in_flight;
> > > > +    }
> > > > +
> > > > +    /* Check the buffers written by device */
> > > > +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
> > > > +         ++status) {
> > > > +        if (*status != VIRTIO_NET_OK) {
> > > > +            return -EINVAL;
> > > > +        }
> > > > +    }
> > > >
> > > >      return 0;
> > > >  }
> > > > --
> > > > 2.25.1
> > > >
> > >
> >
>


Reply via email to