From: "Chen Jing D(Mark)" <jing.d.c...@intel.com> Add function fm10k_rxq_rearm to re-allocate mbuf for used desc in RX HW ring.
Signed-off-by: Chen Jing D(Mark) <jing.d.chen at intel.com> --- drivers/net/fm10k/fm10k.h | 9 ++++ drivers/net/fm10k/fm10k_ethdev.c | 3 + drivers/net/fm10k/fm10k_rxtx_vec.c | 90 ++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 0 deletions(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 362a2d0..5df7960 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -123,6 +123,12 @@ #define FM10K_VFTA_BIT(vlan_id) (1 << ((vlan_id) & 0x1F)) #define FM10K_VFTA_IDX(vlan_id) ((vlan_id) >> 5) +#define RTE_FM10K_RXQ_REARM_THRESH 32 +#define RTE_FM10K_VPMD_TX_BURST 32 +#define RTE_FM10K_MAX_RX_BURST RTE_FM10K_RXQ_REARM_THRESH +#define RTE_FM10K_TX_MAX_FREE_BUF_SZ 64 +#define RTE_FM10K_DESCS_PER_LOOP 4 + struct fm10k_macvlan_filter_info { uint16_t vlan_num; /* Total VLAN number */ uint16_t mac_num; /* Total mac number */ @@ -178,6 +184,9 @@ struct fm10k_rx_queue { volatile uint32_t *tail_ptr; uint16_t nb_desc; uint16_t queue_id; + /* Below 2 fields only valid in case vPMD is applied. */ + uint16_t rxrearm_nb; /* number of remaining to be re-armed */ + uint16_t rxrearm_start; /* the idx we start the re-arming from */ uint8_t port_id; uint8_t drop_en; uint8_t rx_deferred_start; /* don't start this queue in dev start. */ diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index 363ef98..44c3d34 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -121,6 +121,9 @@ rx_queue_reset(struct fm10k_rx_queue *q) q->next_alloc = 0; q->next_trigger = q->alloc_thresh - 1; FM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1); + q->rxrearm_start = 0; + q->rxrearm_nb = 0; + return 0; } diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c index 34b677b..75533f9 100644 --- a/drivers/net/fm10k/fm10k_rxtx_vec.c +++ b/drivers/net/fm10k/fm10k_rxtx_vec.c @@ -64,3 +64,93 @@ fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq) rxq->mbuf_initializer = *(uint64_t *)p; return 0; } + +static inline void +fm10k_rxq_rearm(struct fm10k_rx_queue *rxq) +{ + int i; + uint16_t rx_id; + volatile union fm10k_rx_desc *rxdp; + struct rte_mbuf **mb_alloc = &rxq->sw_ring[rxq->rxrearm_start]; + struct rte_mbuf *mb0, *mb1; + __m128i head_off = _mm_set_epi64x( + RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1, + RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1); + __m128i dma_addr0, dma_addr1; + /* Rx buffer need to be aligned with 512 byte */ + const __m128i hba_msk = _mm_set_epi64x(0, + UINT64_MAX - FM10K_RX_DATABUF_ALIGN + 1); + + rxdp = rxq->hw_ring + rxq->rxrearm_start; + + /* Pull 'n' more MBUFs into the software ring */ + if (rte_mempool_get_bulk(rxq->mp, + (void *)mb_alloc, + RTE_FM10K_RXQ_REARM_THRESH) < 0) { + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += + RTE_FM10K_RXQ_REARM_THRESH; + return; + } + + /* Initialize the mbufs in vector, process 2 mbufs in one loop */ + for (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i += 2, mb_alloc += 2) { + __m128i vaddr0, vaddr1; + uintptr_t p0, p1; + + mb0 = mb_alloc[0]; + mb1 = mb_alloc[1]; + + /* Flush mbuf with pkt template. + * Data to be rearmed is 6 bytes long. + * Though, RX will overwrite ol_flags that are coming next + * anyway. So overwrite whole 8 bytes with one load: + * 6 bytes of rearm_data plus first 2 bytes of ol_flags. + */ + p0 = (uintptr_t)&mb0->rearm_data; + *(uint64_t *)p0 = rxq->mbuf_initializer; + p1 = (uintptr_t)&mb1->rearm_data; + *(uint64_t *)p1 = rxq->mbuf_initializer; + + /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ + vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr)); + vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr)); + + /* convert pa to dma_addr hdr/data */ + dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); + dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); + + /* add headroom to pa values */ + dma_addr0 = _mm_add_epi64(dma_addr0, head_off); + dma_addr1 = _mm_add_epi64(dma_addr1, head_off); + + /* Do 512 byte alignment to satisfy HW requirement, in the + * meanwhile, set Header Buffer Address to zero. + */ + dma_addr0 = _mm_and_si128(dma_addr0, hba_msk); + dma_addr1 = _mm_and_si128(dma_addr1, hba_msk); + + /* flush desc with pa dma_addr */ + _mm_store_si128((__m128i *)&rxdp++->q, dma_addr0); + _mm_store_si128((__m128i *)&rxdp++->q, dma_addr1); + + /* enforce 512B alignment on default Rx virtual addresses */ + mb0->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb0->buf_addr + + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN) + - (char *)mb0->buf_addr); + mb1->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb1->buf_addr + + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN) + - (char *)mb1->buf_addr); + } + + rxq->rxrearm_start += RTE_FM10K_RXQ_REARM_THRESH; + if (rxq->rxrearm_start >= rxq->nb_desc) + rxq->rxrearm_start = 0; + + rxq->rxrearm_nb -= RTE_FM10K_RXQ_REARM_THRESH; + + rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ? + (rxq->nb_desc - 1) : (rxq->rxrearm_start - 1)); + + /* Update the tail pointer on the NIC */ + FM10K_PCI_REG_WRITE(rxq->tail_ptr, rx_id); +} -- 1.7.7.6