On Thu, 22 Oct 2015 20:09:50 +0800
Huawei Xie <huawei.xie at intel.com> wrote:

> Changes in v4:
> - move virtio_xmit_cleanup ahead to free descriptors earlier
> 
> Changes in v3:
> - Remove return at the end of void function
> - Remove always_inline attribute for virtio_xmit_cleanup
> bulk free of mbufs when clean used ring.
> shift operation of idx could be saved if vq_free_cnt means
> free slots rather than free descriptors.
> 
> TODO: rearrange vq data structure, pack the stats var together so that we
> could use one vec instruction to update all of them.
> 
> Signed-off-by: Huawei Xie <huawei.xie at intel.com>
> ---
>  drivers/net/virtio/virtio_ethdev.h      |  3 ++
>  drivers/net/virtio/virtio_rxtx_simple.c | 93 
> +++++++++++++++++++++++++++++++++
>  2 files changed, 96 insertions(+)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.h 
> b/drivers/net/virtio/virtio_ethdev.h
> index d7797ab..ae2d47d 100644
> --- a/drivers/net/virtio/virtio_ethdev.h
> +++ b/drivers/net/virtio/virtio_ethdev.h
> @@ -111,6 +111,9 @@ uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
> **tx_pkts,
>  uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>               uint16_t nb_pkts);
>  
> +uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
> +             uint16_t nb_pkts);
> +
>  /*
>   * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
>   * frames larger than 1514 bytes. We do not yet support software LRO
> diff --git a/drivers/net/virtio/virtio_rxtx_simple.c 
> b/drivers/net/virtio/virtio_rxtx_simple.c
> index ef17562..79b4f7f 100644
> --- a/drivers/net/virtio/virtio_rxtx_simple.c
> +++ b/drivers/net/virtio/virtio_rxtx_simple.c
> @@ -288,6 +288,99 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
> **rx_pkts,
>       return nb_pkts_received;
>  }
>  
> +#define VIRTIO_TX_FREE_THRESH 32
> +#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
> +#define VIRTIO_TX_FREE_NR 32
> +/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid 
> shift */
> +static inline void
> +virtio_xmit_cleanup(struct virtqueue *vq)
> +{
> +     uint16_t i, desc_idx;
> +     int nb_free = 0;
> +     struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
> +
> +     desc_idx = (uint16_t)(vq->vq_used_cons_idx &
> +             ((vq->vq_nentries >> 1) - 1));
> +     free[0] = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
> +     nb_free = 1;
> +
> +     for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
> +             m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
> +             if (likely(m->pool == free[0]->pool))
> +                     free[nb_free++] = m;
> +             else {
> +                     rte_mempool_put_bulk(free[0]->pool, (void **)free,
> +                             nb_free);
> +                     free[0] = m;
> +                     nb_free = 1;
> +             }
> +     }
> +
> +     rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
> +     vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
> +     vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
> +}

I think you need to handle refcount, here is a similar patch
for ixgbe.

Subject: ixgbe: speed up transmit

Coalesce transmit buffers and put them back into the pool
in one burst.

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>

--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -120,12 +120,16 @@ rte_rxmbuf_alloc(struct rte_mempool *mp)
  * Check for descriptors with their DD bit set and free mbufs.
  * Return the total number of buffers freed.
  */
+#define TX_FREE_BULK 32
+
 static inline int __attribute__((always_inline))
 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
 {
        struct ixgbe_tx_entry *txep;
        uint32_t status;
-       int i;
+       int i, n = 0;
+       struct rte_mempool *txpool = NULL;
+       struct rte_mbuf *free_list[TX_FREE_BULK];

        /* check DD bit on threshold descriptor */
        status = txq->tx_ring[txq->tx_next_dd].wb.status;
@@ -138,20 +142,26 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue
         */
        txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);

-       /* free buffers one at a time */
-       if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
-               for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
-                       txep->mbuf->next = NULL;
-                       rte_mempool_put(txep->mbuf->pool, txep->mbuf);
-                       txep->mbuf = NULL;
-               }
-       } else {
-               for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
-                       rte_pktmbuf_free_seg(txep->mbuf);
-                       txep->mbuf = NULL;
+       for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
+               struct rte_mbuf *m;
+
+               /* free buffers one at a time */
+               m = __rte_pktmbuf_prefree_seg(txep->mbuf);
+               txep->mbuf = NULL;
+
+               if (n >= TX_FREE_BULK  ||
+                   (n > 0 && m->pool != txpool)) {
+                       rte_mempool_put_bulk(txpool, (void **)free_list, n);
+                       n = 0;
                }
+
+               txpool = m->pool;
+               free_list[n++] = m;
        }

+       if (n > 0)
+               rte_mempool_put_bulk(txpool, (void **)free_list, n);
+
        /* buffers were freed, update counters */
        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
        txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);

Reply via email to