vhost: support async dequeue for split ring

Maxime Coquelin Mon, 07 Jun 2021 09:17:28 -0700

Hi Yuan,

This is a first review, I will certainly have more comments later.


On 6/2/21 10:31 AM, Yuan Wang wrote:
> This patch implements asynchronous dequeue data path for split ring.
> A new asynchronous dequeue function is introduced. With this function,
> the application can try to receive packets from the guest with
> offloading large copies to the DMA engine, thus saving precious CPU
> cycles.

Do you have any number to share?

> Signed-off-by: Wenwu Ma <[email protected]>
> Signed-off-by: Yuan Wang <[email protected]>
> Signed-off-by: Jiayu Hu <[email protected]>
> ---
>  doc/guides/prog_guide/vhost_lib.rst |  10 +
>  examples/vhost/ioat.c               |  30 +-
>  examples/vhost/ioat.h               |   3 +
>  examples/vhost/main.c               |  60 +--
>  lib/vhost/rte_vhost_async.h         |  44 ++-
>  lib/vhost/version.map               |   3 +
>  lib/vhost/virtio_net.c              | 549 ++++++++++++++++++++++++++++
>  7 files changed, 664 insertions(+), 35 deletions(-)

Please split the patch in multple parts.
At least don't mix example and lib changes in the same patch.

> diff --git a/doc/guides/prog_guide/vhost_lib.rst 
> b/doc/guides/prog_guide/vhost_lib.rst
> index 6b7206bc1d..785ab0fb34 100644
> --- a/doc/guides/prog_guide/vhost_lib.rst
> +++ b/doc/guides/prog_guide/vhost_lib.rst
> @@ -281,6 +281,16 @@ The following is an overview of some key Vhost API 
> functions:
>    Poll enqueue completion status from async data path. Completed packets
>    are returned to applications through ``pkts``.
>  
> +* ``rte_vhost_try_dequeue_burst(vid, queue_id, mbuf_pool, pkts, count, 
> nr_inflight)``

The function should contain async in its name.

BTW, I think we should also rename below APIs while they are
experimental to highlight it is async related:

rte_vhost_submit_enqueue_burst
rte_vhost_poll_enqueue_completed

> +
> +  Try to receive packets from the guest with offloading large packets
> +  to the DMA engine. Successfully dequeued packets are transfer
> +  completed and returned in ``pkts``. But there may be other packets
> +  that are sent from the guest but being transferred by the DMA engine,
> +  called in-flight packets. This function will return in-flight packets
> +  only after the DMA engine finishes transferring. The amount of
> +  in-flight packets by now is returned in ``nr_inflight``.
> +
>  Vhost-user Implementations
>  --------------------------
>  
> diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
> index 2a2c2d7202..236306c9c7 100644
> --- a/examples/vhost/ioat.c
> +++ b/examples/vhost/ioat.c
> @@ -17,7 +17,6 @@ struct packet_tracker {
>       unsigned short next_read;
>       unsigned short next_write;
>       unsigned short last_remain;
> -     unsigned short ioat_space;
>  };
>  
>  struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
> @@ -61,18 +60,30 @@ open_ioat(const char *value)
>               goto out;
>       }
>       while (i < args_nr) {
> +             char *txd, *rxd;
> +             bool is_txd;
>               char *arg_temp = dma_arg[i];
>               uint8_t sub_nr;
> +
>               sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
>               if (sub_nr != 2) {
>                       ret = -1;
>                       goto out;
>               }
>  
> -             start = strstr(ptrs[0], "txd");
> -             if (start == NULL) {
> +             txd = strstr(ptrs[0], "txd");
> +             rxd = strstr(ptrs[0], "rxd");
> +             if (txd == NULL && rxd == NULL) {
>                       ret = -1;
>                       goto out;
> +             } else if (txd) {
> +                     is_txd = true;
> +                     start = txd;
> +                     ret |= ASYNC_RX_VHOST;
> +             } else {
> +                     is_txd = false;
> +                     start = rxd;
> +                     ret |= ASYNC_TX_VHOST;
>               }
>  
>               start += 3;
> @@ -82,7 +93,8 @@ open_ioat(const char *value)
>                       goto out;
>               }
>  
> -             vring_id = 0 + VIRTIO_RXQ;
> +             vring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ;
> +
>               if (rte_pci_addr_parse(ptrs[1],
>                               &(dma_info + vid)->dmas[vring_id].addr) < 0) {
>                       ret = -1;
> @@ -113,7 +125,6 @@ open_ioat(const char *value)
>                       goto out;
>               }
>               rte_rawdev_start(dev_id);
> -             cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE - 1;
>               dma_info->nr++;
>               i++;
>       }
> @@ -128,7 +139,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>               struct rte_vhost_async_status *opaque_data, uint16_t count)
>  {
>       uint32_t i_desc;
> -     uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
> +     uint16_t dev_id = dma_bind[vid].dmas[queue_id].dev_id;

It looks broken with regards to multiqueue (it was before this patch).

In open_ioat(), only dma_bind[vid].dmas[VIRTIO_RXQ] and
dma_bind[vid].dmas[VIRTIO_TXQ] are set.

As it seems that the application does not support multiqueue, it may be
a good idea to check queue_id value before using it.

>       struct rte_vhost_iov_iter *src = NULL;
>       struct rte_vhost_iov_iter *dst = NULL;
>       unsigned long i_seg;
> @@ -140,7 +151,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>                       src = descs[i_desc].src;
>                       dst = descs[i_desc].dst;
>                       i_seg = 0;
> -                     if (cb_tracker[dev_id].ioat_space < src->nr_segs)
> +                     if (rte_ioat_burst_capacity(dev_id) < src->nr_segs)

This change should be in a dedicated patch, it is not related to dequeue
support.

>                               break;
>                       while (i_seg < src->nr_segs) {
>                               rte_ioat_enqueue_copy(dev_id,
> @@ -155,7 +166,6 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
>                       }
>                       write &= mask;
>                       cb_tracker[dev_id].size_track[write] = src->nr_segs;
> -                     cb_tracker[dev_id].ioat_space -= src->nr_segs;
>                       write++;
>               }
>       } else {
> @@ -181,8 +191,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
>               unsigned short mask = MAX_ENQUEUED_SIZE - 1;
>               unsigned short i;
>  
> -             uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
> -                             + VIRTIO_RXQ].dev_id;
> +             uint16_t dev_id = dma_bind[vid].dmas[queue_id].dev_id;
>               n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, 
> dump);
>               if (n_seg < 0) {
>                       RTE_LOG(ERR,
> @@ -194,7 +203,6 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
>               if (n_seg == 0)
>                       return 0;
>  
> -             cb_tracker[dev_id].ioat_space += n_seg;
>               n_seg += cb_tracker[dev_id].last_remain;
>  
>               read = cb_tracker[dev_id].next_read;
> diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
> index 1aa28ed6a3..db7acefc02 100644
> --- a/examples/vhost/ioat.h
> +++ b/examples/vhost/ioat.h
> @@ -13,6 +13,9 @@
>  #define IOAT_RING_SIZE 4096
>  #define MAX_ENQUEUED_SIZE 4096
>  
> +#define ASYNC_RX_VHOST       1
> +#define ASYNC_TX_VHOST       2
> +
>  struct dma_info {
>       struct rte_pci_addr addr;
>       uint16_t dev_id;
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index d2179eadb9..a5662a1a91 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -93,7 +93,8 @@ static int client_mode;
>  
>  static int builtin_net_driver;
>  
> -static int async_vhost_driver;
> +static int async_rx_vhost_driver;
> +static int async_tx_vhost_driver;
>  
>  static char *dma_type;
>  
> @@ -671,13 +672,17 @@ us_vhost_parse_args(int argc, char **argv)
>                       break;
>  
>               case OPT_DMAS_NUM:
> -                     if (open_dma(optarg) == -1) {
> +                     ret = open_dma(optarg);
> +                     if (ret == -1) {
>                               RTE_LOG(INFO, VHOST_CONFIG,
>                                       "Wrong DMA args\n");
>                               us_vhost_usage(prgname);
>                               return -1;
>                       }
> -                     async_vhost_driver = 1;
> +                     if (ret & ASYNC_RX_VHOST)
> +                             async_rx_vhost_driver = 1;
> +                     if (ret & ASYNC_TX_VHOST)
> +                             async_tx_vhost_driver = 1;
>                       break;
>  
>               case OPT_CLIENT_NUM:
> @@ -887,7 +892,7 @@ drain_vhost(struct vhost_dev *vdev)
>  
>       if (builtin_net_driver) {
>               ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> -     } else if (async_vhost_driver) {
> +     } else if (async_rx_vhost_driver) {

I think we should consider having ops for async and sync instead of all
these if/else. It could be refactored as preliminary patch for this
series.

>               uint32_t cpu_cpl_nr = 0;
>               uint16_t enqueue_fail = 0;
>               struct rte_mbuf *m_cpu_cpl[nr_xmit];
> @@ -914,7 +919,7 @@ drain_vhost(struct vhost_dev *vdev)
>                               __ATOMIC_SEQ_CST);
>       }
>  
> -     if (!async_vhost_driver)
> +     if (!async_rx_vhost_driver)
>               free_pkts(m, nr_xmit);
>  }
>

Re: [dpdk-dev] [PATCH 1/1] lib/vhost: support async dequeue for split ring

Reply via email to