> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu > Sent: Monday, September 21, 2020 10:14 AM > > To enhance the per-core performance, this patch adds some AVX512 > instructions to the data path to handle the TX descriptors. > > Signed-off-by: Wenzhuo Lu <wenzhuo...@intel.com> > Signed-off-by: Bruce Richardson <bruce.richard...@intel.com> > Signed-off-by: Leyi Rong <leyi.r...@intel.com>
[...] > +static inline void > +iavf_vtx(volatile struct iavf_tx_desc *txdp, > + struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) > +{ > + const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA | > + ((uint64_t)flags << IAVF_TXD_QW1_CMD_SHIFT)); > + > + /* if unaligned on 32-bit boundary, do one to align */ > + if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) { > + iavf_vtx1(txdp, *pkt, flags); > + nb_pkts--, txdp++, pkt++; > + } > + > + /* do two at a time while possible, in bursts */ It looks like four at a time, not two. > + for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) { > + __m512i desc4 = > + _mm512_set_epi64 > + ((uint64_t)pkt[3]->data_len, > + pkt[3]->buf_iova, > + (uint64_t)pkt[2]->data_len, > + pkt[2]->buf_iova, > + (uint64_t)pkt[1]->data_len, > + pkt[1]->buf_iova, > + (uint64_t)pkt[0]->data_len, > + pkt[0]->buf_iova); > + __m512i hi_qw_tmpl_4 = _mm512_set1_epi64(hi_qw_tmpl); > + __m512i data_off_4 = > + _mm512_set_epi64 > + (0, > + pkt[3]->data_off, > + 0, > + pkt[2]->data_off, > + 0, > + pkt[1]->data_off, > + 0, > + pkt[0]->data_off); > + > + desc4 = _mm512_mask_slli_epi64(desc4, IAVF_TX_LEN_MASK, > desc4, > + IAVF_TXD_QW1_TX_BUF_SZ_SHIFT); > + desc4 = _mm512_mask_or_epi64(desc4, IAVF_TX_LEN_MASK, > desc4, > + hi_qw_tmpl_4); > + desc4 = _mm512_mask_add_epi64(desc4, IAVF_TX_OFF_MASK, > desc4, > + data_off_4); > + _mm512_storeu_si512((void *)txdp, desc4); > + } > + > + /* do any last ones */ > + while (nb_pkts) { > + iavf_vtx1(txdp, *pkt, flags); > + txdp++, pkt++, nb_pkts--; > + } > +}