From: "Chen Jing D(Mark)" <jing.d.c...@intel.com> Add 2 functions, in which using SSE instructions to parse RX desc to get pkt_type and ol_flags in mbuf.
Signed-off-by: Chen Jing D(Mark) <jing.d.chen at intel.com> --- drivers/net/fm10k/fm10k_rxtx_vec.c | 127 ++++++++++++++++++++++++++++++++++++ 1 files changed, 127 insertions(+), 0 deletions(-) diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c index 75533f9..581a309 100644 --- a/drivers/net/fm10k/fm10k_rxtx_vec.c +++ b/drivers/net/fm10k/fm10k_rxtx_vec.c @@ -44,6 +44,133 @@ #pragma GCC diagnostic ignored "-Wcast-qual" #endif +/* Handling the offload flags (olflags) field takes computation + * time when receiving packets. Therefore we provide a flag to disable + * the processing of the olflags field when they are not needed. This + * gives improved performance, at the cost of losing the offload info + * in the received packet + */ +#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE + +/* Vlan present flag shift */ +#define VP_SHIFT (2) +/* L3 type shift */ +#define L3TYPE_SHIFT (4) +/* L4 type shift */ +#define L4TYPE_SHIFT (7) + +static inline void +fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) +{ + __m128i ptype0, ptype1, vtag0, vtag1; + union { + uint16_t e[4]; + uint64_t dword; + } vol; + + const __m128i pkttype_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, + PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT); + + /* mask everything except rss type */ + const __m128i rsstype_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x000F, 0x000F, 0x000F, 0x000F); + + /* map rss type to rss hash flag */ + const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0, + 0, 0, 0, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0); + + ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]); + ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); + vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]); + vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]); + + ptype0 = _mm_unpacklo_epi32(ptype0, ptype1); + ptype0 = _mm_and_si128(ptype0, rsstype_msk); + ptype0 = _mm_shuffle_epi8(rss_flags, ptype0); + + vtag1 = _mm_unpacklo_epi32(vtag0, vtag1); + vtag1 = _mm_srli_epi16(vtag1, VP_SHIFT); + vtag1 = _mm_and_si128(vtag1, pkttype_msk); + + vtag1 = _mm_or_si128(ptype0, vtag1); + vol.dword = _mm_cvtsi128_si64(vtag1); + + rx_pkts[0]->ol_flags = vol.e[0]; + rx_pkts[1]->ol_flags = vol.e[1]; + rx_pkts[2]->ol_flags = vol.e[2]; + rx_pkts[3]->ol_flags = vol.e[3]; +} + +static inline void +fm10k_desc_to_pktype_v(__m128i descs[4], struct rte_mbuf **rx_pkts) +{ + __m128i l3l4type0, l3l4type1, l3type, l4type; + union { + uint16_t e[4]; + uint64_t dword; + } vol; + + /* L3 pkt type mask Bit4 to Bit6 */ + const __m128i l3type_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0070, 0x0070, 0x0070, 0x0070); + + /* L4 pkt type mask Bit7 to Bit9 */ + const __m128i l4type_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0380, 0x0380, 0x0380, 0x0380); + + /* convert RRC l3 type to mbuf format */ + const __m128i l3type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, RTE_PTYPE_L3_IPV6_EXT, + RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV4_EXT, + RTE_PTYPE_L3_IPV4, 0); + + /* Convert RRC l4 type to mbuf format l4type_flags shift-left 8 bits + * to fill into8 bits length. + */ + const __m128i l4type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, + RTE_PTYPE_TUNNEL_GENEVE >> 8, + RTE_PTYPE_TUNNEL_NVGRE >> 8, + RTE_PTYPE_TUNNEL_VXLAN >> 8, + RTE_PTYPE_TUNNEL_GRE >> 8, + RTE_PTYPE_L4_UDP >> 8, + RTE_PTYPE_L4_TCP >> 8, + 0); + + l3l4type0 = _mm_unpacklo_epi16(descs[0], descs[1]); + l3l4type1 = _mm_unpacklo_epi16(descs[2], descs[3]); + l3l4type0 = _mm_unpacklo_epi32(l3l4type0, l3l4type1); + + l3type = _mm_and_si128(l3l4type0, l3type_msk); + l4type = _mm_and_si128(l3l4type0, l4type_msk); + + l3type = _mm_srli_epi16(l3type, L3TYPE_SHIFT); + l4type = _mm_srli_epi16(l4type, L4TYPE_SHIFT); + + l3type = _mm_shuffle_epi8(l3type_flags, l3type); + /* l4type_flags shift-left for 8 bits, need shift-right back */ + l4type = _mm_shuffle_epi8(l4type_flags, l4type); + + l4type = _mm_slli_epi16(l4type, 8); + l3l4type0 = _mm_or_si128(l3type, l4type); + vol.dword = _mm_cvtsi128_si64(l3l4type0); + + rx_pkts[0]->packet_type = vol.e[0]; + rx_pkts[1]->packet_type = vol.e[1]; + rx_pkts[2]->packet_type = vol.e[2]; + rx_pkts[3]->packet_type = vol.e[3]; +} +#else +#define fm10k_desc_to_olflags_v(desc, rx_pkts) do {} while (0) +#define fm10k_desc_to_pktype_v(desc, rx_pkts) do {} while (0) +#endif + int __attribute__((cold)) fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq) { -- 1.7.7.6