On 17 Jun 2021, at 18:27, Kumar Amber wrote:
> From: Harry van Haaren <harry.van.haa...@intel.com> > > This commit adds 3 new traffic profile implementations to the > existing avx512 miniflow extract infrastructure. The profiles added are: > - Ether()/IP()/TCP() > - Ether()/Dot1Q()/IP()/UDP() > - Ether()/Dot1Q()/IP()/TCP() > > The design of the avx512 code here is for scalability to add more > traffic profiles, as well as enabling CPU ISA. Note that an implementation > is primarily adding static const data, which the compiler then specializes > away when the profile specific function is declared below. > > As a result, the code is relatively maintainable, and scalable for new > traffic profiles as well as new ISA, and does not lower performance > compared with manually written code for each profile/ISA. > > Note that confidence in the correctness of each implementation is > achieved through autovalidation, unit tests with known packets, and > fuzz tested packets. > > Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> > > --- > > Hi Readers, > > If you have a traffic profile you'd like to see accelerated using > avx512 code, please send me an email and we can collaborate on adding > support for it! > > Regards, -Harry > --- > lib/dpif-netdev-extract-avx512.c | 155 ++++++++++++++++++++++++++++++ > lib/dpif-netdev-private-extract.c | 31 ++++++ > lib/dpif-netdev-private-extract.h | 4 + > 3 files changed, 190 insertions(+) > > diff --git a/lib/dpif-netdev-extract-avx512.c > b/lib/dpif-netdev-extract-avx512.c > index 1145ac8a9..0e0f6e295 100644 > --- a/lib/dpif-netdev-extract-avx512.c > +++ b/lib/dpif-netdev-extract-avx512.c > @@ -117,6 +117,13 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, > __m512i idx, __m512i a) > > #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF) > #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00) > +#define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00) > + > +/* VLAN (Dot1Q) patterns and masks. */ > +#define PATTERN_DT1Q_MASK \ > + 0x00, 0x00, 0xFF, 0xFF, > +#define PATTERN_DT1Q_IPV4 \ > + 0x00, 0x00, 0x08, 0x00, > > /* Generator for checking IPv4 ver, ihl, and proto */ > #define PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \ > @@ -142,6 +149,29 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, > __m512i idx, __m512i a) > 34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* UDP */ > \ > NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. > */ > > +/* TCP shuffle: tcp_ctl bits require mask/processing, not included here. */ > +#define PATTERN_IPV4_TCP_SHUFFLE \ > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether > */ \ > + 26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* IPv4 */ > \ > + NU, NU, NU, NU, NU, NU, NU, NU, 34, 35, 36, 37, NU, NU, NU, NU, /* TCP */ > \ > + NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. > */ > + > +#define PATTERN_DT1Q_IPV4_UDP_SHUFFLE > \ > + /* Ether (2 blocks): Note that *VLAN* type is written here. */ > \ > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 0, 0, > \ > + /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */ > \ > + 12, 13, 14, 15, 0, 0, 0, 0, > \ > + 30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27, /* IPv4 */ > \ > + 38, 39, 40, 41, NU, NU, NU, NU, /* UDP */ > + > +#define PATTERN_DT1Q_IPV4_TCP_SHUFFLE > \ > + /* Ether (2 blocks): Note that *VLAN* type is written here. */ > \ > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 0, 0, > \ > + /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */ > \ > + 12, 13, 14, 15, 0, 0, 0, 0, > \ > + 30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27, /* IPv4 */ > \ > + NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */ > \ > + NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */ > > /* Generation of K-mask bitmask values, to zero out data in result. Note that > * these correspond 1:1 to the above "*_SHUFFLE" values, and bit used must be > @@ -151,12 +181,22 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, > __m512i idx, __m512i a) > * Note the ULL suffix allows shifting by 32 or more without integer > overflow. > */ > #define KMASK_ETHER 0x1FFFULL > +#define KMASK_DT1Q 0x000FULL This was messing me up, as this suggests this is a 16-byte mask, but this is only 8, so maybe we should indicate it by removing the two leading zeros? #define KMASK_DT1Q 0x0FULL > #define KMASK_IPV4 0xF0FFULL > #define KMASK_UDP 0x000FULL > +#define KMASK_TCP 0x0F00ULL > > #define PATTERN_IPV4_UDP_KMASK \ > (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_UDP << 32)) > > +#define PATTERN_IPV4_TCP_KMASK \ > + (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_TCP << 32)) > + > +#define PATTERN_DT1Q_IPV4_UDP_KMASK \ > + (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_UDP << > 40)) > + > +#define PATTERN_DT1Q_IPV4_TCP_KMASK \ > + (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << > 40)) > > /* This union allows initializing static data as u8, but easily loading it > * into AVX512 registers too. The union ensures proper alignment for the zmm. > @@ -194,6 +234,9 @@ struct mfex_profile { > > enum MFEX_PROFILES { > PROFILE_ETH_IPV4_UDP, > + PROFILE_ETH_IPV4_TCP, > + PROFILE_ETH_VLAN_IPV4_UDP, > + PROFILE_ETH_VLAN_IPV4_TCP, > PROFILE_COUNT, > }; > > @@ -215,6 +258,56 @@ static const struct mfex_profile > mfex_profiles[PROFILE_COUNT] = > }, > .dp_pkt_min_size = 42, > }, > + > + [PROFILE_ETH_IPV4_TCP] = { > + .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK }, > + .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP}, > + > + .store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE }, > + .store_kmsk = PATTERN_IPV4_TCP_KMASK, > + > + .mf_bits = { 0x18a0000000000000, 0x0000000000044401}, > + .dp_pkt_offs = { > + 0, UINT16_MAX, 14, 34, > + }, > + .dp_pkt_min_size = 54, > + }, > + > + [PROFILE_ETH_VLAN_IPV4_UDP] = { > + .probe_mask.u8_data = { > + PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK > + }, > + .probe_data.u8_data = { > + PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_UDP > + }, > + > + .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE }, > + .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK, > + > + .mf_bits = { 0x38a0000000000000, 0x0000000000040401}, > + .dp_pkt_offs = { > + 14, UINT16_MAX, 18, 38, > + }, > + .dp_pkt_min_size = 46, > + }, > + > + [PROFILE_ETH_VLAN_IPV4_TCP] = { > + .probe_mask.u8_data = { > + PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK > + }, > + .probe_data.u8_data = { > + PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP > + }, > + > + .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE }, > + .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK, > + > + .mf_bits = { 0x38a0000000000000, 0x0000000000044401}, > + .dp_pkt_offs = { > + 14, UINT16_MAX, 18, 38, > + }, > + .dp_pkt_min_size = 46, > + }, > }; > > > @@ -233,6 +326,28 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct > ip_header *nh, > return 0; > } > > +/* Fixup the VLAN CFI and PCP, reading the PCP from the input to this > function, > + * and storing the output CFI bit bitwise-OR-ed with the PCP to miniflow. > + */ > +static void > +mfex_vlan_pcp(const uint8_t vlan_pcp, uint64_t *block) > +{ > + /* Bitwise-OR in the CFI flag, keeping other data the same. */ > + uint8_t *cfi_byte = (uint8_t *) block; > + cfi_byte[2] = 0x10 | vlan_pcp; > +} > + > +/* Process TCP flags using known LE endian-ness as this is AVX512 code. */ > +#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32) > TCP_FLAGS_BE16(tcp_ctl)) > + Looks like the TCP_FLAGS_BE32() macro is not used in this code. > +static void > +mfex_handle_tcp_flags(const struct tcp_header *tcp, uint64_t *block) > +{ > + uint16_t ctl = (OVS_FORCE uint16_t) TCP_FLAGS_BE16(tcp->tcp_ctl); > + uint64_t ctl_u64 = ctl; > + *block = ctl_u64 << 32; > +} > + > /* Generic loop to process any mfex profile. This code is specialized into > * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE > * to ensure the compiler specializes each instance. The code is marked "hot" > @@ -321,6 +436,43 @@ mfex_avx512_process(struct dp_packet_batch *packets, > ovs_assert(0); /* avoid compiler warning on missing ENUM */ > break; > NIT: As we might continue to add variants, would a callback in the profile be cleaner? Not sure what arguments to pass? Just a thought… > + case PROFILE_ETH_VLAN_IPV4_TCP: { > + mfex_vlan_pcp(pkt[14], &keys[i].buf[4]); > + > + uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; > + struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; > + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { > + continue; > + } > + > + /* Process TCP flags, and store to blocks. */ > + const struct tcp_header *tcp = (void *)&pkt[38]; > + mfex_handle_tcp_flags(tcp, &blocks[7]); > + } break; > + > + case PROFILE_ETH_VLAN_IPV4_UDP: { > + mfex_vlan_pcp(pkt[14], &keys[i].buf[4]); > + > + uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; > + struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; > + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { > + continue; > + } > + } break; > + > + case PROFILE_ETH_IPV4_TCP: { > + /* Process TCP flags, and store to blocks. */ > + const struct tcp_header *tcp = (void *)&pkt[34]; > + mfex_handle_tcp_flags(tcp, &blocks[6]); > + > + /* Handle dynamic l2_pad_size. */ > + uint32_t size_from_ipv4 = size - sizeof(struct eth_header); > + struct ip_header *nh = (void *)&pkt[sizeof(struct > eth_header)]; > + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { > + continue; > + } > + } break; > + > case PROFILE_ETH_IPV4_UDP: { > /* Handle dynamic l2_pad_size. */ > uint32_t size_from_ipv4 = size - sizeof(struct eth_header); > @@ -370,6 +522,9 @@ mfex_avx512_##name(struct dp_packet_batch *packets, > \ > * as required. > */ > DECLARE_MFEX_FUNC(ip_udp,PROFILE_ETH_IPV4_UDP) > +DECLARE_MFEX_FUNC(ip_tcp,PROFILE_ETH_IPV4_TCP) > +DECLARE_MFEX_FUNC(dot1q_ip_udp,PROFILE_ETH_VLAN_IPV4_UDP) > +DECLARE_MFEX_FUNC(dot1q_ip_tcp,PROFILE_ETH_VLAN_IPV4_TCP) > > > static int32_t > diff --git a/lib/dpif-netdev-private-extract.c > b/lib/dpif-netdev-private-extract.c > index 106a83867..65072eb38 100644 > --- a/lib/dpif-netdev-private-extract.c > +++ b/lib/dpif-netdev-private-extract.c > @@ -60,6 +60,37 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = { > .extract_func = mfex_avx512_ip_udp, > .name = "avx512_ipv4_udp", > }, > + { > + .probe = mfex_avx512_vbmi_probe, > + .extract_func = mfex_avx512_vbmi_ip_tcp, > + .name = "avx512_vbmi_ipv4_tcp", > + }, > + { > + .probe = mfex_avx512_probe, > + .extract_func = mfex_avx512_ip_tcp, > + .name = "avx512_ipv4_tcp", > + }, > + > + { > + .probe = mfex_avx512_vbmi_probe, > + .extract_func = mfex_avx512_vbmi_dot1q_ip_udp, > + .name = "avx512_vbmi_dot1q_ipv4_udp", > + }, > + { > + .probe = mfex_avx512_probe, > + .extract_func = mfex_avx512_dot1q_ip_udp, > + .name = "avx512_dot1q_ipv4_udp", > + }, > + { > + .probe = mfex_avx512_vbmi_probe, > + .extract_func = mfex_avx512_vbmi_dot1q_ip_tcp, > + .name = "avx512_vbmi_dot1q_ipv4_tcp", > + }, > + { > + .probe = mfex_avx512_probe, > + .extract_func = mfex_avx512_dot1q_ip_tcp, > + .name = "avx512_dot1q_ipv4_tcp", > + }, > #endif > }; > > diff --git a/lib/dpif-netdev-private-extract.h > b/lib/dpif-netdev-private-extract.h > index f32be202a..b9a59c5a0 100644 > --- a/lib/dpif-netdev-private-extract.h > +++ b/lib/dpif-netdev-private-extract.h > @@ -152,6 +152,10 @@ int32_t mfex_avx512_vbmi_probe(void); > odp_port_t in_port, void *pmd_handle); > > DECLARE_AVX512_MFEX_PROTOTYPE(ip_udp); > +DECLARE_AVX512_MFEX_PROTOTYPE(ip_tcp); > +DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_udp); > +DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_tcp); > + > #endif /* __x86_64__ */ > > > -- > 2.25.1 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev