To unify packet types among all PMDs, bit masks of packet type for ol_flags are replaced by unified packet type. Note that around 2% performance drop (64B) was observed of doing 4 ports (1 port per 82599 card) IO forwarding on the same SNB core.
Signed-off-by: Cunming Liang <cunming.liang at intel.com> Signed-off-by: Helin Zhang <helin.zhang at intel.com> --- lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 49 +++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 23 deletions(-) v2 changes: * Used redefined packet types and enlarged packet_type field in mbuf. diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c index b54cb19..357eb1d 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c @@ -134,44 +134,35 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq) */ #ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE -#define OLFLAGS_MASK ((uint16_t)(PKT_RX_VLAN_PKT | PKT_RX_IPV4_HDR |\ - PKT_RX_IPV4_HDR_EXT | PKT_RX_IPV6_HDR |\ - PKT_RX_IPV6_HDR_EXT)) -#define OLFLAGS_MASK_V (((uint64_t)OLFLAGS_MASK << 48) | \ - ((uint64_t)OLFLAGS_MASK << 32) | \ - ((uint64_t)OLFLAGS_MASK << 16) | \ - ((uint64_t)OLFLAGS_MASK)) -#define PTYPE_SHIFT (1) +#define OLFLAGS_MASK_V (((uint64_t)PKT_RX_VLAN_PKT << 48) | \ + ((uint64_t)PKT_RX_VLAN_PKT << 32) | \ + ((uint64_t)PKT_RX_VLAN_PKT << 16) | \ + ((uint64_t)PKT_RX_VLAN_PKT)) #define VTAG_SHIFT (3) static inline void desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) { - __m128i ptype0, ptype1, vtag0, vtag1; + __m128i vtag0, vtag1; union { uint16_t e[4]; uint64_t dword; } vol; - ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]); - ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]); vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]); - ptype1 = _mm_unpacklo_epi32(ptype0, ptype1); vtag1 = _mm_unpacklo_epi32(vtag0, vtag1); - - ptype1 = _mm_slli_epi16(ptype1, PTYPE_SHIFT); vtag1 = _mm_srli_epi16(vtag1, VTAG_SHIFT); - ptype1 = _mm_or_si128(ptype1, vtag1); - vol.dword = _mm_cvtsi128_si64(ptype1) & OLFLAGS_MASK_V; + vol.dword = _mm_cvtsi128_si64(vtag1) & OLFLAGS_MASK_V; rx_pkts[0]->ol_flags = vol.e[0]; rx_pkts[1]->ol_flags = vol.e[1]; rx_pkts[2]->ol_flags = vol.e[2]; rx_pkts[3]->ol_flags = vol.e[3]; } + #else #define desc_to_olflags_v(desc, rx_pkts) do {} while (0) #endif @@ -197,13 +188,15 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint64_t var; __m128i shuf_msk; __m128i crc_adjust = _mm_set_epi16( - 0, 0, 0, 0, /* ignore non-length fields */ + 0, 0, 0, /* ignore non-length fields */ + -rxq->crc_len, /* sub crc on data_len */ 0, /* ignore high-16bits of pkt_len */ -rxq->crc_len, /* sub crc on pkt_len */ - -rxq->crc_len, /* sub crc on data_len */ - 0 /* ignore pkt_type field */ + 0, 0 /* ignore pkt_type field */ ); __m128i dd_check, eop_check; + __m128i desc_mask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFF07F0); if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST)) return 0; @@ -234,12 +227,13 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ - 0xFF, 0xFF, /* skip high 16 bits vlan_macip, zero out */ 15, 14, /* octet 14~15, low 16 bits vlan_macip */ + 13, 12, /* octet 12~13, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 13, 12, /* octet 12~13, low 16 bits pkt_len */ - 13, 12, /* octet 12~13, 16 bits data_len */ - 0xFF, 0xFF /* skip pkt_type field */ + 0xFF, 0xFF, /* skip high 16 bits pkt_type */ + 1, /* octet 1, 8 bits pkt_type field */ + 0 /* octet 0, 4 bits offset 4 pkt_type field */ ); /* Cache is empty -> need to scan the buffer rings, but first move @@ -248,6 +242,7 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* * A. load 4 packet in one loop + * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] @@ -289,6 +284,14 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); + /* A* mask out 0~3 bits RSS type */ + descs[3] = _mm_and_si128(descs[3], desc_mask); + descs[2] = _mm_and_si128(descs[2], desc_mask); + + /* A* mask out 0~3 bits RSS type */ + descs[1] = _mm_and_si128(descs[1], desc_mask); + descs[0] = _mm_and_si128(descs[0], desc_mask); + /* avoid compiler reorder optimization */ rte_compiler_barrier(); @@ -301,7 +304,7 @@ _recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); - /* set ol_flags with packet type and vlan tag */ + /* set ol_flags with vlan packet type */ desc_to_olflags_v(descs, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ -- 1.9.3