> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu
> Sent: Monday, September 21, 2020 10:14 AM
> 
> To enhance the per-core performance, this patch adds some AVX512
> instructions to the data path to handle the TX descriptors.
> 
> Signed-off-by: Wenzhuo Lu <wenzhuo...@intel.com>
> Signed-off-by: Bruce Richardson <bruce.richard...@intel.com>
> Signed-off-by: Leyi Rong <leyi.r...@intel.com>

[...]

> +static inline void
> +iavf_vtx(volatile struct iavf_tx_desc *txdp,
> +      struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
> +{
> +     const uint64_t hi_qw_tmpl = (IAVF_TX_DESC_DTYPE_DATA |
> +                     ((uint64_t)flags  << IAVF_TXD_QW1_CMD_SHIFT));
> +
> +     /* if unaligned on 32-bit boundary, do one to align */
> +     if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
> +             iavf_vtx1(txdp, *pkt, flags);
> +             nb_pkts--, txdp++, pkt++;
> +     }
> +
> +     /* do two at a time while possible, in bursts */

It looks like four at a time, not two.

> +     for (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {
> +             __m512i desc4 =
> +                     _mm512_set_epi64
> +                             ((uint64_t)pkt[3]->data_len,
> +                              pkt[3]->buf_iova,
> +                              (uint64_t)pkt[2]->data_len,
> +                              pkt[2]->buf_iova,
> +                              (uint64_t)pkt[1]->data_len,
> +                              pkt[1]->buf_iova,
> +                              (uint64_t)pkt[0]->data_len,
> +                              pkt[0]->buf_iova);
> +             __m512i hi_qw_tmpl_4 = _mm512_set1_epi64(hi_qw_tmpl);
> +             __m512i data_off_4 =
> +                     _mm512_set_epi64
> +                             (0,
> +                              pkt[3]->data_off,
> +                              0,
> +                              pkt[2]->data_off,
> +                              0,
> +                              pkt[1]->data_off,
> +                              0,
> +                              pkt[0]->data_off);
> +
> +             desc4 = _mm512_mask_slli_epi64(desc4, IAVF_TX_LEN_MASK,
> desc4,
> +                                            IAVF_TXD_QW1_TX_BUF_SZ_SHIFT);
> +             desc4 = _mm512_mask_or_epi64(desc4, IAVF_TX_LEN_MASK,
> desc4,
> +                                          hi_qw_tmpl_4);
> +             desc4 = _mm512_mask_add_epi64(desc4, IAVF_TX_OFF_MASK,
> desc4,
> +                                           data_off_4);
> +             _mm512_storeu_si512((void *)txdp, desc4);
> +     }
> +
> +     /* do any last ones */
> +     while (nb_pkts) {
> +             iavf_vtx1(txdp, *pkt, flags);
> +             txdp++, pkt++, nb_pkts--;
> +     }
> +}

Reply via email to