31/08/2023 18:24, Konstantin Ananyev пишет:



Define specific function implementation for i40e driver.
Currently, mbufs recycle mode can support 128bit vector path and avx2 path.
And can be enabled both in fast free and no fast free mode.

Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
Signed-off-by: Feifei Wang <feifei.wa...@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
---
  drivers/net/i40e/i40e_ethdev.c                |   1 +
  drivers/net/i40e/i40e_ethdev.h                |   2 +
  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
++++++++++++++++++
  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
  drivers/net/i40e/i40e_rxtx.h                  |   4 +
  drivers/net/i40e/meson.build                  |   1 +
  6 files changed, 187 insertions(+)
  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 8271bbb394..50ba9aac94 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
        .flow_ops_get                 = i40e_dev_flow_ops_get,
        .rxq_info_get                 = i40e_rxq_info_get,
        .txq_info_get                 = i40e_txq_info_get,
+       .recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
        .rx_burst_mode_get            = i40e_rx_burst_mode_get,
        .tx_burst_mode_get            = i40e_tx_burst_mode_get,
        .timesync_enable              = i40e_timesync_enable,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 6f65d5e0ac..af758798e1 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
uint16_t queue_id,
        struct rte_eth_rxq_info *qinfo);
  void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        struct rte_eth_txq_info *qinfo);
+void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+       struct rte_eth_recycle_rxq_info *recycle_rxq_info);
  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
                           struct rte_eth_burst_mode *mode);  int
i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --
git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
new file mode 100644
index 0000000000..5663ecccde
--- /dev/null
+++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Arm Limited.
+ */
+
+#include <stdint.h>
+#include <ethdev_driver.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
+nb_mbufs) {
+       struct i40e_rx_queue *rxq = rx_queue;
+       struct i40e_rx_entry *rxep;
+       volatile union i40e_rx_desc *rxdp;
+       uint16_t rx_id;
+       uint64_t paddr;
+       uint64_t dma_addr;
+       uint16_t i;
+
+       rxdp = rxq->rx_ring + rxq->rxrearm_start;
+       rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+       for (i = 0; i < nb_mbufs; i++) {
+               /* Initialize rxdp descs. */
+               paddr = (rxep[i].mbuf)->buf_iova +
RTE_PKTMBUF_HEADROOM;
+               dma_addr = rte_cpu_to_le_64(paddr);
+               /* flush desc with pa dma_addr */
+               rxdp[i].read.hdr_addr = 0;
+               rxdp[i].read.pkt_addr = dma_addr;
+       }
+
+       /* Update the descriptor initializer index */
+       rxq->rxrearm_start += nb_mbufs;
+       rx_id = rxq->rxrearm_start - 1;
+
+       if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+               rxq->rxrearm_start = 0;
+               rx_id = rxq->nb_rx_desc - 1;
+       }
+
+       rxq->rxrearm_nb -= nb_mbufs;
+
+       rte_io_wmb();
+       /* Update the tail pointer on the NIC */
+       I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
+
+uint16_t
+i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+       struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
+       struct i40e_tx_queue *txq = tx_queue;
+       struct i40e_tx_entry *txep;
+       struct rte_mbuf **rxep;
+       int i, n;
+       uint16_t nb_recycle_mbufs;
+       uint16_t avail = 0;
+       uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+       uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+       uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+       uint16_t refill_head = *recycle_rxq_info->refill_head;
+       uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+       /* Get available recycling Rx buffers. */
+       avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+       /* Check Tx free thresh and Rx available space. */
+       if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+               return 0;
+
+       /* check DD bits on threshold descriptor */
+       if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+
        rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+
        rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+               return 0;
+
+       n = txq->tx_rs_thresh;
+       nb_recycle_mbufs = n;
+
+       /* Mbufs recycle mode can only support no ring buffer wrapping
around.
+        * Two case for this:
+        *
+        * case 1: The refill head of Rx buffer ring needs to be aligned with
+        * mbuf ring size. In this case, the number of Tx freeing buffers
+        * should be equal to refill_requirement.
+        *
+        * case 2: The refill head of Rx ring buffer does not need to be aligned
+        * with mbuf ring size. In this case, the update of refill head can not
+        * exceed the Rx mbuf ring size.
+        */
+       if (refill_requirement != n ||
+               (!refill_requirement && (refill_head + n > mbuf_ring_size)))
+               return 0;
+
+       /* First buffer to free from S/W ring is at index
+        * tx_next_dd - (tx_rs_thresh-1).
+        */
+       txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+       rxep = recycle_rxq_info->mbuf_ring;
+       rxep += refill_head;
+
+       if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+               /* Avoid txq contains buffers from unexpected mempool. */
+               if (unlikely(recycle_rxq_info->mp
+                                       != txep[0].mbuf->pool))
+                       return 0;
+
+               /* Directly put mbufs from Tx to Rx. */
+               for (i = 0; i < n; i++)
+                       rxep[i] = txep[i].mbuf;
+       } else {
+               for (i = 0; i < n; i++) {
+                       rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+                       /* If Tx buffers are not the last reference or from
+                        * unexpected mempool, previous copied buffers are
+                        * considered as invalid.
+                        */
+                       if (unlikely((rxep[i] == NULL && refill_requirement) ||
[Konstantin]
Could you pls remind me why it is ok to have rxep[i]==NULL when
refill_requirement is not set?

If reill_requirement is not zero, it means each tx freed buffer must be valid 
and can be put into Rx
sw_ring. Then  the refill head of Rx buffer ring can be aligned with mbuf ring 
size. Briefly speaking
the number of Tx valid freed buffer must be equal to Rx refill_requirement. For 
example, i40e driver.

If reill_requirement is zero, it means that the refill head of Rx buffer ring 
does not need to be aligned
with mbuf ring size, thus if Tx have n valid freed buffers, we just need to put 
these n buffers into Rx sw-ring,
and not to be equal to the Rx setting rearm number. For example, mlx5 driver.

In conclusion, above difference is due to pmd drivers have different strategies 
to update their Rx rearm(refill) head.
For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to  the 
number of each rearm is a fixed value by default.
For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve real 
index. Thus its rearm number can be a different value.

Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this mbuf is 
not free yet and can't be reused.
Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
Or probably would be enough to skip that mbuf?
Might be something like that:

for (i = 0, j = 0; i < n; i++) {

        rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
        if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
                if (refill_requirement) {
                        nb_recycle_mbufs = 0;
                        break;
                }
        } else
                j++;
}

/* now j contains actual number of recycled mbufs */

?


After another thought, it might be easier and cleaner just to:
if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
        nb_recycle_mbufs = 0;

Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't recycle that mbuf.




+                                       recycle_rxq_info->mp != txep[i].mbuf-
pool))
+                               nb_recycle_mbufs = 0;
+               }
+               /* If Tx buffers are not the last reference or
+                * from unexpected mempool, all recycled buffers
+                * are put into mempool.
+                */
+               if (nb_recycle_mbufs == 0)
+                       for (i = 0; i < n; i++) {
+                               if (rxep[i] != NULL)
+                                       rte_mempool_put(rxep[i]->pool,
rxep[i]);
+                       }
+       }
+
+       /* Update counters for Tx. */
+       txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+       txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+       if (txq->tx_next_dd >= txq->nb_tx_desc)
+               txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+       return nb_recycle_mbufs;
+}
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
b4f65b58fa..a9c9eb331c 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
uint16_t queue_id,
        qinfo->conf.offloads = txq->offloads;
  }

+void
+i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+       struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
+       struct i40e_rx_queue *rxq;
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+       rxq = dev->data->rx_queues[queue_id];
+
+       recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
+       recycle_rxq_info->mp = rxq->mp;
+       recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
+       recycle_rxq_info->receive_tail = &rxq->rx_tail;
+
+       if (ad->rx_vec_allowed) {
+               recycle_rxq_info->refill_requirement =
RTE_I40E_RXQ_REARM_THRESH;
+               recycle_rxq_info->refill_head = &rxq->rxrearm_start;
+       } else {
+               recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
+               recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
+       }
+}
+
  #ifdef RTE_ARCH_X86
  static inline bool
  get_avx_supported(bool request_avx512)
@@ -3293,6 +3317,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                                dev->rx_pkt_burst = ad->rx_use_avx2 ?
                                        i40e_recv_scattered_pkts_vec_avx2 :
                                        i40e_recv_scattered_pkts_vec;
+                               dev->recycle_rx_descriptors_refill =
+                                       i40e_recycle_rx_descriptors_refill_vec;
                        }
                } else {
                        if (ad->rx_use_avx512) {
@@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                                dev->rx_pkt_burst = ad->rx_use_avx2 ?
                                        i40e_recv_pkts_vec_avx2 :
                                        i40e_recv_pkts_vec;
+                               dev->recycle_rx_descriptors_refill =
+                                       i40e_recycle_rx_descriptors_refill_vec;
                        }
                }
  #else /* RTE_ARCH_X86 */
+               dev->recycle_rx_descriptors_refill =
+i40e_recycle_rx_descriptors_refill_vec;
                if (dev->data->scattered_rx) {
                        PMD_INIT_LOG(DEBUG,
                                     "Using Vector Scattered Rx (port %d).", @@
-3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
                                dev->tx_pkt_burst = ad->tx_use_avx2 ?
                                                    i40e_xmit_pkts_vec_avx2 :
                                                    i40e_xmit_pkts_vec;
+                               dev->recycle_tx_mbufs_reuse =
i40e_recycle_tx_mbufs_reuse_vec;
                        }
  #else /* RTE_ARCH_X86 */
                        PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
                                     dev->data->port_id);
                        dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+                       dev->recycle_tx_mbufs_reuse =
i40e_recycle_tx_mbufs_reuse_vec;
  #endif /* RTE_ARCH_X86 */
                } else {
                        PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
                        dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+                       dev->recycle_tx_mbufs_reuse =
i40e_recycle_tx_mbufs_reuse_vec;
                }
                dev->tx_pkt_prepare = i40e_simple_prep_pkts;
        } else {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index
a8686224e5..b191f23e1f 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
*rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t
offset);  int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);

+uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+               struct rte_eth_recycle_rxq_info *recycle_rxq_info); void
+i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
+nb_mbufs);
+
  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                            uint16_t nb_pkts);
  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index
8e53b87a65..3b1a233c84 100644
--- a/drivers/net/i40e/meson.build
+++ b/drivers/net/i40e/meson.build
@@ -34,6 +34,7 @@ sources = files(
          'i40e_tm.c',
          'i40e_hash.c',
          'i40e_vf_representor.c',
+       'i40e_recycle_mbufs_vec_common.c',
          'rte_pmd_i40e.c',
  )

--
2.25.1


Reply via email to