On 10/23/2015 12:57 AM, Stephen Hemminger wrote: > On Thu, 22 Oct 2015 20:09:50 +0800 > Huawei Xie <huawei.xie at intel.com> wrote: > >> Changes in v4: >> - move virtio_xmit_cleanup ahead to free descriptors earlier >> >> Changes in v3: >> - Remove return at the end of void function >> - Remove always_inline attribute for virtio_xmit_cleanup >> bulk free of mbufs when clean used ring. >> shift operation of idx could be saved if vq_free_cnt means >> free slots rather than free descriptors. >> >> TODO: rearrange vq data structure, pack the stats var together so that we >> could use one vec instruction to update all of them. >> >> Signed-off-by: Huawei Xie <huawei.xie at intel.com> >> --- >> drivers/net/virtio/virtio_ethdev.h | 3 ++ >> drivers/net/virtio/virtio_rxtx_simple.c | 93 >> +++++++++++++++++++++++++++++++++ >> 2 files changed, 96 insertions(+) >> >> diff --git a/drivers/net/virtio/virtio_ethdev.h >> b/drivers/net/virtio/virtio_ethdev.h >> index d7797ab..ae2d47d 100644 >> --- a/drivers/net/virtio/virtio_ethdev.h >> +++ b/drivers/net/virtio/virtio_ethdev.h >> @@ -111,6 +111,9 @@ uint16_t virtio_xmit_pkts(void *tx_queue, struct >> rte_mbuf **tx_pkts, >> uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, >> uint16_t nb_pkts); >> >> +uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, >> + uint16_t nb_pkts); >> + >> /* >> * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us >> * frames larger than 1514 bytes. We do not yet support software LRO >> diff --git a/drivers/net/virtio/virtio_rxtx_simple.c >> b/drivers/net/virtio/virtio_rxtx_simple.c >> index ef17562..79b4f7f 100644 >> --- a/drivers/net/virtio/virtio_rxtx_simple.c >> +++ b/drivers/net/virtio/virtio_rxtx_simple.c >> @@ -288,6 +288,99 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf >> **rx_pkts, >> return nb_pkts_received; >> } >> >> +#define VIRTIO_TX_FREE_THRESH 32 >> +#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 >> +#define VIRTIO_TX_FREE_NR 32 >> +/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid >> shift */ >> +static inline void >> +virtio_xmit_cleanup(struct virtqueue *vq) >> +{ >> + uint16_t i, desc_idx; >> + int nb_free = 0; >> + struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; >> + >> + desc_idx = (uint16_t)(vq->vq_used_cons_idx & >> + ((vq->vq_nentries >> 1) - 1)); >> + free[0] = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; >> + nb_free = 1; >> + >> + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { >> + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; >> + if (likely(m->pool == free[0]->pool)) >> + free[nb_free++] = m; >> + else { >> + rte_mempool_put_bulk(free[0]->pool, (void **)free, >> + nb_free); >> + free[0] = m; >> + nb_free = 1; >> + } >> + } >> + >> + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); >> + vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; >> + vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); >> +} > I think you need to handle refcount, here is a similar patch > for ixgbe. ok, like this:
m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; if (likely(m != NULL)) { ... > > Subject: ixgbe: speed up transmit > > Coalesce transmit buffers and put them back into the pool > in one burst. > > Signed-off-by: Stephen Hemminger <stephen at networkplumber.org> > > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c > @@ -120,12 +120,16 @@ rte_rxmbuf_alloc(struct rte_mempool *mp) > * Check for descriptors with their DD bit set and free mbufs. > * Return the total number of buffers freed. > */ > +#define TX_FREE_BULK 32 > + > static inline int __attribute__((always_inline)) > ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq) > { > struct ixgbe_tx_entry *txep; > uint32_t status; > - int i; > + int i, n = 0; > + struct rte_mempool *txpool = NULL; > + struct rte_mbuf *free_list[TX_FREE_BULK]; > > /* check DD bit on threshold descriptor */ > status = txq->tx_ring[txq->tx_next_dd].wb.status; > @@ -138,20 +142,26 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue > */ > txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]); > > - /* free buffers one at a time */ > - if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) { > - for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) { > - txep->mbuf->next = NULL; > - rte_mempool_put(txep->mbuf->pool, txep->mbuf); > - txep->mbuf = NULL; > - } > - } else { > - for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) { > - rte_pktmbuf_free_seg(txep->mbuf); > - txep->mbuf = NULL; > + for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) { > + struct rte_mbuf *m; > + > + /* free buffers one at a time */ > + m = __rte_pktmbuf_prefree_seg(txep->mbuf); > + txep->mbuf = NULL; > + > + if (n >= TX_FREE_BULK || check whether m is NULL here. > + (n > 0 && m->pool != txpool)) { > + rte_mempool_put_bulk(txpool, (void **)free_list, n); > + n = 0; > } > + > + txpool = m->pool; > + free_list[n++] = m; > } > > + if (n > 0) > + rte_mempool_put_bulk(txpool, (void **)free_list, n); > + > /* buffers were freed, update counters */ > txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); > txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh); > >