From: "Chen Jing D(Mark)" <jing.d.c...@intel.com>

Add function fm10k_rxq_rearm to re-allocate mbuf for used desc
in RX HW ring.

Signed-off-by: Chen Jing D(Mark) <jing.d.chen at intel.com>
---
 drivers/net/fm10k/fm10k.h          |    9 ++++
 drivers/net/fm10k/fm10k_ethdev.c   |    3 +
 drivers/net/fm10k/fm10k_rxtx_vec.c |   90 ++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 0 deletions(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 362a2d0..5df7960 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -123,6 +123,12 @@
 #define FM10K_VFTA_BIT(vlan_id)    (1 << ((vlan_id) & 0x1F))
 #define FM10K_VFTA_IDX(vlan_id)    ((vlan_id) >> 5)

+#define RTE_FM10K_RXQ_REARM_THRESH      32
+#define RTE_FM10K_VPMD_TX_BURST         32
+#define RTE_FM10K_MAX_RX_BURST          RTE_FM10K_RXQ_REARM_THRESH
+#define RTE_FM10K_TX_MAX_FREE_BUF_SZ    64
+#define RTE_FM10K_DESCS_PER_LOOP    4
+
 struct fm10k_macvlan_filter_info {
        uint16_t vlan_num;       /* Total VLAN number */
        uint16_t mac_num;        /* Total mac number */
@@ -178,6 +184,9 @@ struct fm10k_rx_queue {
        volatile uint32_t *tail_ptr;
        uint16_t nb_desc;
        uint16_t queue_id;
+       /* Below 2 fields only valid in case vPMD is applied. */
+       uint16_t rxrearm_nb;     /* number of remaining to be re-armed */
+       uint16_t rxrearm_start;  /* the idx we start the re-arming from */
        uint8_t port_id;
        uint8_t drop_en;
        uint8_t rx_deferred_start; /* don't start this queue in dev start. */
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 363ef98..44c3d34 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -121,6 +121,9 @@ rx_queue_reset(struct fm10k_rx_queue *q)
        q->next_alloc = 0;
        q->next_trigger = q->alloc_thresh - 1;
        FM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1);
+       q->rxrearm_start = 0;
+       q->rxrearm_nb = 0;
+
        return 0;
 }

diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c 
b/drivers/net/fm10k/fm10k_rxtx_vec.c
index 34b677b..75533f9 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -64,3 +64,93 @@ fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq)
        rxq->mbuf_initializer = *(uint64_t *)p;
        return 0;
 }
+
+static inline void
+fm10k_rxq_rearm(struct fm10k_rx_queue *rxq)
+{
+       int i;
+       uint16_t rx_id;
+       volatile union fm10k_rx_desc *rxdp;
+       struct rte_mbuf **mb_alloc = &rxq->sw_ring[rxq->rxrearm_start];
+       struct rte_mbuf *mb0, *mb1;
+       __m128i head_off = _mm_set_epi64x(
+                       RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1,
+                       RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1);
+       __m128i dma_addr0, dma_addr1;
+       /* Rx buffer need to be aligned with 512 byte */
+       const __m128i hba_msk = _mm_set_epi64x(0,
+                               UINT64_MAX - FM10K_RX_DATABUF_ALIGN + 1);
+
+       rxdp = rxq->hw_ring + rxq->rxrearm_start;
+
+       /* Pull 'n' more MBUFs into the software ring */
+       if (rte_mempool_get_bulk(rxq->mp,
+                                (void *)mb_alloc,
+                                RTE_FM10K_RXQ_REARM_THRESH) < 0) {
+               rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+                       RTE_FM10K_RXQ_REARM_THRESH;
+               return;
+       }
+
+       /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+       for (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i += 2, mb_alloc += 2) {
+               __m128i vaddr0, vaddr1;
+               uintptr_t p0, p1;
+
+               mb0 = mb_alloc[0];
+               mb1 = mb_alloc[1];
+
+               /* Flush mbuf with pkt template.
+                * Data to be rearmed is 6 bytes long.
+                * Though, RX will overwrite ol_flags that are coming next
+                * anyway. So overwrite whole 8 bytes with one load:
+                * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+                */
+               p0 = (uintptr_t)&mb0->rearm_data;
+               *(uint64_t *)p0 = rxq->mbuf_initializer;
+               p1 = (uintptr_t)&mb1->rearm_data;
+               *(uint64_t *)p1 = rxq->mbuf_initializer;
+
+               /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
+               vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
+               vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
+
+               /* convert pa to dma_addr hdr/data */
+               dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+               dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+               /* add headroom to pa values */
+               dma_addr0 = _mm_add_epi64(dma_addr0, head_off);
+               dma_addr1 = _mm_add_epi64(dma_addr1, head_off);
+
+               /* Do 512 byte alignment to satisfy HW requirement, in the
+                * meanwhile, set Header Buffer Address to zero.
+                */
+               dma_addr0 = _mm_and_si128(dma_addr0, hba_msk);
+               dma_addr1 = _mm_and_si128(dma_addr1, hba_msk);
+
+               /* flush desc with pa dma_addr */
+               _mm_store_si128((__m128i *)&rxdp++->q, dma_addr0);
+               _mm_store_si128((__m128i *)&rxdp++->q, dma_addr1);
+
+               /* enforce 512B alignment on default Rx virtual addresses */
+               mb0->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb0->buf_addr
+                               + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN)
+                               - (char *)mb0->buf_addr);
+               mb1->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb1->buf_addr
+                               + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN)
+                               - (char *)mb1->buf_addr);
+       }
+
+       rxq->rxrearm_start += RTE_FM10K_RXQ_REARM_THRESH;
+       if (rxq->rxrearm_start >= rxq->nb_desc)
+               rxq->rxrearm_start = 0;
+
+       rxq->rxrearm_nb -= RTE_FM10K_RXQ_REARM_THRESH;
+
+       rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
+                            (rxq->nb_desc - 1) : (rxq->rxrearm_start - 1));
+
+       /* Update the tail pointer on the NIC */
+       FM10K_PCI_REG_WRITE(rxq->tail_ptr, rx_id);
+}
-- 
1.7.7.6

Reply via email to