On Wed, 22 Nov 2023 14:47:02 +0800, Jason Wang <[email protected]> wrote:
> On Wed, Nov 22, 2023 at 2:34 PM Xuan Zhuo <[email protected]> wrote:
> >
> > On Wed, 22 Nov 2023 14:03:32 +0800, Jason Wang <[email protected]> wrote:
> > > On Tue, Nov 21, 2023 at 2:27 PM Xuan Zhuo <[email protected]>
> > > wrote:
> > > >
> > > > On Thu, 16 Nov 2023 16:11:11 +0800, Jason Wang <[email protected]>
> > > > wrote:
> > > > > On Tue, Nov 14, 2023 at 7:31 PM Xuan Zhuo
> > > > > <[email protected]> wrote:
> > > > > >
> > > > > > introduce virtqueue_get_buf_ctx_dma() to collect the dma info when
> > > > > > get buf from virtio core for premapped mode.
> > > > > >
> > > > > > If the virtio queue is premapped mode, the virtio-net send buf may
> > > > > > have many desc. Every desc dma address need to be unmap. So here we
> > > > > > introduce a new helper to collect the dma address of the buffer from
> > > > > > the virtio core.
> > > > >
> > > > > So looking at vring_desc_extra, what we have right now is:
> > > > >
> > > > > struct vring_desc_extra {
> > > > > dma_addr_t addr; /* Descriptor DMA addr. */
> > > > > u32 len; /* Descriptor length. */
> > > > > u16 flags; /* Descriptor flags. */
> > > > > u16 next; /* The next desc state in a
> > > > > list. */
> > > > > };
> > > > >
> > > > > And sg is
> > > > >
> > > > > struct scatterlist {
> > > > > unsigned long page_link;
> > > > > unsigned int offset;
> > > > > unsigned int length;
> > > > > dma_addr_t dma_address;
> > > > > #ifdef CONFIG_NEED_SG_DMA_LENGTH
> > > > > unsigned int dma_length;
> > > > > #endif
> > > > > #ifdef CONFIG_NEED_SG_DMA_FLAGS
> > > > > unsigned int dma_flags;
> > > > > #endif
> > > > > };
> > > > >
> > > > > Would it better just store sg?
> > > >
> > > > Do you mean we expose the vring_desc_extra to dirver?
> > >
> > > Maybe or not, (or just us sg for desc_extra).
> > >
> > > Anyhow, any method needs to be benchmarked to see the performance impact.
> > >
> > > >
> > > > How about introducing such a new structure?
> > > >
> > > > struct virtio_dma_item {
> > > > dma_addr_t addr;
> > > > u32 len;
> > > > };
> > > >
> > > > struct virtio_dma_head {
> > > > u32 num;
> > > > u32 used;
> > > > struct virtio_dma_item items[];
> > > > };
> > > >
> > > > Then we just need to pass one pointer to the virtio.
> > >
> > > Just to make sure I understand here, where did we store those arrays?
> >
> >
> > On the stack, or we can reuse the memory space of sq->sg.
> >
> > dma = (struct virtio_dma_head*)sq->sg;
>
> Ok, we need some benchmark for sure.
>
> >
> >
> > >
> > > > num is used to pass the size of items
> > > > used is used to return the num used by virtio core.
> > > >
> > > > >
> > > > > More below
> > > > >
> > > > > >
> > > > > > Signed-off-by: Xuan Zhuo <[email protected]>
> > > > > > ---
> > > > > > drivers/virtio/virtio_ring.c | 148
> > > > > > ++++++++++++++++++++++++++---------
> > > > > > include/linux/virtio.h | 2 +
> > > > > > 2 files changed, 115 insertions(+), 35 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio_ring.c
> > > > > > b/drivers/virtio/virtio_ring.c
> > > > > > index 51d8f3299c10..0b3caee4ef9d 100644
> > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > @@ -362,6 +362,20 @@ static struct device *vring_dma_dev(const
> > > > > > struct vring_virtqueue *vq)
> > > > > > return vq->dma_dev;
> > > > > > }
> > > > > >
> > > > > > +static void store_dma_to_sg(struct scatterlist **sgp, dma_addr_t
> > > > > > addr, unsigned int length)
> > > > > > +{
> > > > > > + struct scatterlist *sg;
> > > > > > +
> > > > > > + sg = *sgp;
> > > > > > +
> > > > > > + sg->dma_address = addr;
> > > > > > + sg->length = length;
> > > > > > +
> > > > > > + sg = sg_next(sg);
> > > > > > +
> > > > > > + *sgp = sg;
> > > > > > +}
> > > > > > +
> > > > > > /* Map one sg entry. */
> > > > > > static int vring_map_one_sg(const struct vring_virtqueue *vq,
> > > > > > struct scatterlist *sg,
> > > > > > enum dma_data_direction direction,
> > > > > > dma_addr_t *addr)
> > > > > > @@ -441,12 +455,18 @@ static void virtqueue_init(struct
> > > > > > vring_virtqueue *vq, u32 num)
> > > > > > */
> > > > > >
> > > > > > static void vring_unmap_one_split_indirect(const struct
> > > > > > vring_virtqueue *vq,
> > > > > > - const struct vring_desc
> > > > > > *desc)
> > > > > > + const struct vring_desc
> > > > > > *desc,
> > > > > > + struct scatterlist **sg)
> > > > > > {
> > > > > > u16 flags;
> > > > > >
> > > > > > - if (!vq->do_unmap)
> > > > > > + if (!vq->do_unmap) {
> > > > > > + if (*sg)
> > > > >
> > > > > Can we simply move the
> > > > >
> > > > > if (*sg) to store_dma_to_sg()?
> > > >
> > > > OK
> > > >
> > > >
> > > > >
> > > > > > + store_dma_to_sg(sg,
> > > > > > +
> > > > > > virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > > > > > +
> > > > > > virtio32_to_cpu(vq->vq.vdev, desc->len));
> > > > > > return;
> > > > > > + }
> > > > > >
> > > > > > flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > > > > >
> > > > > > @@ -458,7 +478,7 @@ static void
> > > > > > vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> > > > > > }
> > > > > >
> > > > > > static unsigned int vring_unmap_one_split(const struct
> > > > > > vring_virtqueue *vq,
> > > > > > - unsigned int i)
> > > > > > + unsigned int i, struct
> > > > > > scatterlist **sg)
> > > > > > {
> > > > > > struct vring_desc_extra *extra = vq->split.desc_extra;
> > > > > > u16 flags;
> > > > > > @@ -475,8 +495,11 @@ static unsigned int
> > > > > > vring_unmap_one_split(const struct vring_virtqueue *vq,
> > > > > > (flags & VRING_DESC_F_WRITE) ?
> > > > > > DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > > } else {
> > > > > > - if (!vq->do_unmap)
> > > > > > + if (!vq->do_unmap) {
> > > > >
> > > > > The:
> > > > >
> > > > > else {
> > > > > if {
> > > > > if {
> > > > > }
> > > > >
> > > > > seems odd. I think I would prefer
> > > > >
> > > > > if (flags & VRING_DESC_F_INDIRECT) {
> > > > > } else if (!vq->do_unmap) {
> > > > > } else {
> > > > > }
> > > >
> > > >
> > > > Will fix.
> > > >
> > > >
> > > > >
> > > > > here
> > > > >
> > > > > Btw, I really think do_unmap is not a good name, we probably need to
> > > > > rename it as "unmap_desc".
> > > >
> > > > How about a separate patch for this?
> > > >
> > > > >
> > > > >
> > > > > > + if (*sg)
> > > > > > + store_dma_to_sg(sg, extra[i].addr,
> > > > > > extra[i].len);
> > > > >
> > > > > In which case we need to unmap by driver but we don't need a dma
> > > > > address?
> > > >
> > > > Sorry, do not get it.
> > >
> > > I meant, I don't get how we can reach to this check.
> >
> > This is the main way. I think you miss something.
>
> Or maybe you can explain the case where vq->do_unmap is false but *sg is NULL?
If vq->do_unmap is false, that is premapped mode or use_dma_api is false.
If the *sg is null, that means that the driver call the API
virtqueue_get_buf_ctx or virtqueue_get_buf.
If the *sg is not null, that means that the driver call the API
void *virtqueue_get_buf_ctx_dma(struct virtqueue *_vq, unsigned int
*len,
struct scatterlist *sg, void **ctx);
Thanks.
>
> >
> >
> > >
> > > >
> > > > >
> > > > > > goto out;
> > > > > > + }
> > > > > >
> > > > > > dma_unmap_page(vring_dma_dev(vq),
> > > > > > extra[i].addr,
> > > > > > @@ -717,10 +740,10 @@ static inline int virtqueue_add_split(struct
> > > > > > virtqueue *_vq,
> > > > > > if (i == err_idx)
> > > > > > break;
> > > > > > if (indirect) {
> > > > > > - vring_unmap_one_split_indirect(vq,
> > > > > > &desc[i]);
> > > > > > + vring_unmap_one_split_indirect(vq,
> > > > > > &desc[i], NULL);
> > > > > > i = virtio16_to_cpu(_vq->vdev,
> > > > > > desc[i].next);
> > > > > > } else
> > > > > > - i = vring_unmap_one_split(vq, i);
> > > > > > + i = vring_unmap_one_split(vq, i, NULL);
> > > > > > }
> > > > > >
> > > > > > free_indirect:
> > > > > > @@ -763,7 +786,7 @@ static bool virtqueue_kick_prepare_split(struct
> > > > > > virtqueue *_vq)
> > > > > > }
> > > > > >
> > > > > > static void detach_buf_split(struct vring_virtqueue *vq, unsigned
> > > > > > int head,
> > > > > > - void **ctx)
> > > > > > + struct scatterlist *sg, void **ctx)
> > > > > > {
> > > > > > unsigned int i, j;
> > > > > > __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev,
> > > > > > VRING_DESC_F_NEXT);
> > > > > > @@ -775,12 +798,12 @@ static void detach_buf_split(struct
> > > > > > vring_virtqueue *vq, unsigned int head,
> > > > > > i = head;
> > > > > >
> > > > > > while (vq->split.vring.desc[i].flags & nextflag) {
> > > > > > - vring_unmap_one_split(vq, i);
> > > > > > + vring_unmap_one_split(vq, i, &sg);
> > > > > > i = vq->split.desc_extra[i].next;
> > > > > > vq->vq.num_free++;
> > > > > > }
> > > > > >
> > > > > > - vring_unmap_one_split(vq, i);
> > > > > > + vring_unmap_one_split(vq, i, &sg);
> > > > > > vq->split.desc_extra[i].next = vq->free_head;
> > > > > > vq->free_head = head;
> > > > > >
> > > > > > @@ -794,7 +817,7 @@ static void detach_buf_split(struct
> > > > > > vring_virtqueue *vq, unsigned int head,
> > > > > >
> > > > > > /* Free the indirect table, if any, now that it's
> > > > > > unmapped. */
> > > > > > if (!indir_desc)
> > > > > > - return;
> > > > > > + goto end;
> > > > > >
> > > > > > len = vq->split.desc_extra[head].len;
> > > > > >
> > > > > > @@ -802,9 +825,10 @@ static void detach_buf_split(struct
> > > > > > vring_virtqueue *vq, unsigned int head,
> > > > > > VRING_DESC_F_INDIRECT));
> > > > > > BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> > > > > >
> > > > > > - if (vq->do_unmap) {
> > > > > > + if (vq->do_unmap || sg) {
> > > > > > for (j = 0; j < len / sizeof(struct
> > > > > > vring_desc); j++)
> > > > > > - vring_unmap_one_split_indirect(vq,
> > > > > > &indir_desc[j]);
> > > > > > + vring_unmap_one_split_indirect(vq,
> > > > > > &indir_desc[j], &sg);
> > > > > > +
> > > > > > }
> > > > > >
> > > > > > kfree(indir_desc);
> > > > > > @@ -812,6 +836,11 @@ static void detach_buf_split(struct
> > > > > > vring_virtqueue *vq, unsigned int head,
> > > > > > } else if (ctx) {
> > > > > > *ctx = vq->split.desc_state[head].indir_desc;
> > > > > > }
> > > > > > +
> > > > > > +end:
> > > > > > + /* sg point to the next. So we mark the last one as the
> > > > > > end. */
> > > > > > + if (!vq->do_unmap && sg)
> > > > > > + sg_mark_end(sg - 1);
> > > > > > }
> > > > > >
> > > > > > static bool more_used_split(const struct vring_virtqueue *vq)
> > > > > > @@ -822,6 +851,7 @@ static bool more_used_split(const struct
> > > > > > vring_virtqueue *vq)
> > > > > >
> > > > > > static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > > > > > unsigned int *len,
> > > > > > + struct scatterlist *sg,
> > > > > > void **ctx)
> > > > > > {
> > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > @@ -862,7 +892,7 @@ static void *virtqueue_get_buf_ctx_split(struct
> > > > > > virtqueue *_vq,
> > > > > >
> > > > > > /* detach_buf_split clears data, so grab it now. */
> > > > > > ret = vq->split.desc_state[i].data;
> > > > > > - detach_buf_split(vq, i, ctx);
> > > > > > + detach_buf_split(vq, i, sg, ctx);
> > > > > > vq->last_used_idx++;
> > > > > > /* If we expect an interrupt for the next entry, tell host
> > > > > > * by writing event index and flush out the write before
> > > > > > @@ -984,7 +1014,7 @@ static void
> > > > > > *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
> > > > > > continue;
> > > > > > /* detach_buf_split clears data, so grab it now. */
> > > > > > buf = vq->split.desc_state[i].data;
> > > > > > - detach_buf_split(vq, i, NULL);
> > > > > > + detach_buf_split(vq, i, NULL, NULL);
> > > > > > vq->split.avail_idx_shadow--;
> > > > > > vq->split.vring.avail->idx =
> > > > > > cpu_to_virtio16(_vq->vdev,
> > > > > > vq->split.avail_idx_shadow);
> > > > > > @@ -1221,7 +1251,8 @@ static u16 packed_last_used(u16 last_used_idx)
> > > > > > }
> > > > > >
> > > > > > static void vring_unmap_extra_packed(const struct vring_virtqueue
> > > > > > *vq,
> > > > > > - const struct vring_desc_extra
> > > > > > *extra)
> > > > > > + const struct vring_desc_extra
> > > > > > *extra,
> > > > > > + struct scatterlist **sg)
> > > > > > {
> > > > > > u16 flags;
> > > > > >
> > > > > > @@ -1236,8 +1267,11 @@ static void vring_unmap_extra_packed(const
> > > > > > struct vring_virtqueue *vq,
> > > > > > (flags & VRING_DESC_F_WRITE) ?
> > > > > > DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > > > > > } else {
> > > > > > - if (!vq->do_unmap)
> > > > > > + if (!vq->do_unmap) {
> > > > > > + if (*sg)
> > > > > > + store_dma_to_sg(sg, extra->addr,
> > > > > > extra->len);
> > > > > > return;
> > > > > > + }
> > > > > >
> > > > > > dma_unmap_page(vring_dma_dev(vq),
> > > > > > extra->addr, extra->len,
> > > > > > @@ -1247,12 +1281,17 @@ static void vring_unmap_extra_packed(const
> > > > > > struct vring_virtqueue *vq,
> > > > > > }
> > > > > >
> > > > > > static void vring_unmap_desc_packed(const struct vring_virtqueue
> > > > > > *vq,
> > > > > > - const struct vring_packed_desc
> > > > > > *desc)
> > > > > > + const struct vring_packed_desc
> > > > > > *desc,
> > > > > > + struct scatterlist **sg)
> > > > > > {
> > > > >
> > > > > Interesting, I think this is only needed for indirect descriptors?
> > > > >
> > > > > If yes, why do we care about the dma addresses of indirect
> > > > > descriptors?
> > > > > If not, it's a bug that we should use desc_extra, otherwise it's a
> > > > > device-triggerable unmap which has security implications (we need to
> > > > > use vring_extra in this case).
> > > >
> > > > Sorry, I do not get.
> > > >
> > > > indirect desc(alloc by virtio core)
> > > > virtio-desc ------> | | ------------------------------------->
> > > > dma address of buffer
> > > > | | ------------------------------------->
> > > > dma address of buffer
> > > > | | ------------------------------------->
> > > > dma address of buffer
> > > > | | ------------------------------------->
> > > > dma address of buffer
> > > > | | ------------------------------------->
> > > > dma address of buffer
> > > > | | ------------------------------------->
> > > > dma address of buffer
> > > >
> > > > For vring_unmap_desc_packed the desc is the indirect desc(alloc by
> > > > virtio core,
> > > > not the virtio desc), which record the dma address of buffer that is
> > > > passed by
> > > > the driver. Here we need to record the dma address to back to the
> > > > driver.
> > >
> > > I meant why should we return this to the driver? It is allocated by
> > > the vitio core and should be transparent to the driver.
> >
> > Yes.
> >
> > So here, we collect the buffer dma address not the address of the indirect
> > desc
> > allocated by the virtio core.
> >
> > I also think you miss something.
>
> Probably, I guess it works since it is only used for unmapping
> indirect descriptors. So indirect descriptors using stream DMA mapping
> where we don't need vring_extra.
>
> Renaming it to something like vring_unmap_packed_indirect() might be better.
YES.
Thanks.
>
> Thanks
>
> >
> > static void detach_buf_packed(struct vring_virtqueue *vq,
> > unsigned int id, void **ctx)
> > {
> > struct vring_desc_state_packed *state = NULL;
> > struct vring_packed_desc *desc;
> > unsigned int i, curr;
> >
> > state = &vq->packed.desc_state[id];
> >
> > /* Clear data ptr. */
> > state->data = NULL;
> >
> > vq->packed.desc_extra[state->last].next = vq->free_head;
> > vq->free_head = id;
> > vq->vq.num_free += state->num;
> >
> > if (unlikely(vq->do_unmap)) {
> > curr = id;
> > for (i = 0; i < state->num; i++) {
> > vring_unmap_extra_packed(vq,
> >
> > &vq->packed.desc_extra[curr]);
> > curr = vq->packed.desc_extra[curr].next;
> > }
> > }
> >
> > if (vq->indirect) {
> > u32 len;
> >
> > /* Free the indirect table, if any, now that it's
> > unmapped. */
> > desc = state->indir_desc;
> > if (!desc)
> > return;
> >
> > if (vq->do_unmap) {
> > len = vq->packed.desc_extra[id].len;
> > for (i = 0; i < len / sizeof(struct
> > vring_packed_desc);
> > i++)
> > --> vring_unmap_desc_packed(vq,
> > &desc[i]);
> > }
> > kfree(desc);
> > state->indir_desc = NULL;
> > } else if (ctx) {
> > *ctx = state->indir_desc;
> > }
> > }
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> >
>