From: Harry van Haaren <harry.van.haa...@intel.com> This commit adds 3 new traffic profile implementations to the existing avx512 miniflow extract infrastructure. The profiles added are: - Ether()/IP()/TCP() - Ether()/Dot1Q()/IP()/UDP() - Ether()/Dot1Q()/IP()/TCP()
The design of the avx512 code here is for scalability to add more traffic profiles, as well as enabling CPU ISA. Note that an implementation is primarily adding static const data, which the compiler then specializes away when the profile specific function is declared below. As a result, the code is relatively maintainable, and scalable for new traffic profiles as well as new ISA, and does not lower performance compared with manually written code for each profile/ISA. Note that confidence in the correctness of each implementation is achieved through autovalidation, unit tests with known packets, and fuzz tested packets. Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> Acked-by: Eelco Chaudron <echau...@redhat.com> Acked-by: Flavio Leitner <f...@sysclose.org> --- Hi Readers, If you have a traffic profile you'd like to see accelerated using avx512 code, please send me an email and we can collaborate on adding support for it! Regards, -Harry --- v5: - fix review comments(Ian, Flavio, Eelco) --- --- NEWS | 2 + lib/dpif-netdev-extract-avx512.c | 152 ++++++++++++++++++++++++++++++ lib/dpif-netdev-private-extract.c | 30 ++++++ lib/dpif-netdev-private-extract.h | 10 ++ 4 files changed, 194 insertions(+) diff --git a/NEWS b/NEWS index 26cd85978..849008a80 100644 --- a/NEWS +++ b/NEWS @@ -41,6 +41,8 @@ Post-v2.15.0 * Add build time configure command to enable auto-validatior as default miniflow implementation at build time. * Cache results for CPU ISA checks, reduces overhead on repeated lookups. + * Add AVX512 based optimized miniflow extract function for traffic type + IPv4/UDP, IPv4/TCP, Vlan/IPv4/UDP and Vlan/Ipv4/TCP. - ovs-ctl: * New option '--no-record-hostname' to disable hostname configuration in ovsdb on startup. diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c index c06e53582..ecb0be70d 100644 --- a/lib/dpif-netdev-extract-avx512.c +++ b/lib/dpif-netdev-extract-avx512.c @@ -136,6 +136,13 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF) #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00) +#define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00) + +/* VLAN (Dot1Q) patterns and masks. */ +#define PATTERN_DT1Q_MASK \ + 0x00, 0x00, 0xFF, 0xFF, +#define PATTERN_DT1Q_IPV4 \ + 0x00, 0x00, 0x08, 0x00, /* Generator for checking IPv4 ver, ihl, and proto */ #define PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \ @@ -161,6 +168,29 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) 34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* UDP */ \ NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */ +/* TCP shuffle: tcp_ctl bits require mask/processing, not included here. */ +#define PATTERN_IPV4_TCP_SHUFFLE \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether */ \ + 26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* IPv4 */ \ + NU, NU, NU, NU, NU, NU, NU, NU, 34, 35, 36, 37, NU, NU, NU, NU, /* TCP */ \ + NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */ + +#define PATTERN_DT1Q_IPV4_UDP_SHUFFLE \ + /* Ether (2 blocks): Note that *VLAN* type is written here. */ \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 0, 0, \ + /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */ \ + 12, 13, 14, 15, 0, 0, 0, 0, \ + 30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27, /* IPv4 */ \ + 38, 39, 40, 41, NU, NU, NU, NU, /* UDP */ + +#define PATTERN_DT1Q_IPV4_TCP_SHUFFLE \ + /* Ether (2 blocks): Note that *VLAN* type is written here. */ \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 0, 0, \ + /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */ \ + 12, 13, 14, 15, 0, 0, 0, 0, \ + 30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27, /* IPv4 */ \ + NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */ \ + NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */ /* Generation of K-mask bitmask values, to zero out data in result. Note that * these correspond 1:1 to the above "*_SHUFFLE" values, and bit used must be @@ -170,12 +200,22 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) * Note the ULL suffix allows shifting by 32 or more without integer overflow. */ #define KMASK_ETHER 0x1FFFULL +#define KMASK_DT1Q 0x0FULL #define KMASK_IPV4 0xF0FFULL #define KMASK_UDP 0x000FULL +#define KMASK_TCP 0x0F00ULL #define PATTERN_IPV4_UDP_KMASK \ (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_UDP << 32)) +#define PATTERN_IPV4_TCP_KMASK \ + (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_TCP << 32)) + +#define PATTERN_DT1Q_IPV4_UDP_KMASK \ + (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_UDP << 40)) + +#define PATTERN_DT1Q_IPV4_TCP_KMASK \ + (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << 40)) /* This union allows initializing static data as u8, but easily loading it * into AVX512 registers too. The union ensures proper alignment for the zmm. @@ -252,6 +292,9 @@ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); enum MFEX_PROFILES { PROFILE_ETH_IPV4_UDP, + PROFILE_ETH_IPV4_TCP, + PROFILE_ETH_VLAN_IPV4_UDP, + PROFILE_ETH_VLAN_IPV4_TCP, PROFILE_COUNT, }; @@ -275,6 +318,56 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = }, .dp_pkt_min_size = 42, }, + + [PROFILE_ETH_IPV4_TCP] = { + .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK }, + .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP}, + + .store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE }, + .store_kmsk = PATTERN_IPV4_TCP_KMASK, + + .mf_bits = { 0x18a0000000000000, 0x0000000000044401}, + .dp_pkt_offs = { + 0, UINT16_MAX, 14, 34, + }, + .dp_pkt_min_size = 54, + }, + + [PROFILE_ETH_VLAN_IPV4_UDP] = { + .probe_mask.u8_data = { + PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK + }, + .probe_data.u8_data = { + PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_UDP + }, + + .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE }, + .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK, + + .mf_bits = { 0x38a0000000000000, 0x0000000000040401}, + .dp_pkt_offs = { + 14, UINT16_MAX, 18, 38, + }, + .dp_pkt_min_size = 46, + }, + + [PROFILE_ETH_VLAN_IPV4_TCP] = { + .probe_mask.u8_data = { + PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK + }, + .probe_data.u8_data = { + PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP + }, + + .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE }, + .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK, + + .mf_bits = { 0x38a0000000000000, 0x0000000000044401}, + .dp_pkt_offs = { + 14, UINT16_MAX, 18, 38, + }, + .dp_pkt_min_size = 46, + }, }; @@ -293,6 +386,25 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh, return 0; } +/* Fixup the VLAN CFI and PCP, reading the PCP from the input to this function, + * and storing the output CFI bit bitwise-OR-ed with the PCP to miniflow. + */ +static void +mfex_vlan_pcp(const uint8_t vlan_pcp, uint64_t *block) +{ + /* Bitwise-OR in the CFI flag, keeping other data the same. */ + uint8_t *cfi_byte = (uint8_t *) block; + cfi_byte[2] = 0x10 | vlan_pcp; +} + +static void +mfex_handle_tcp_flags(const struct tcp_header *tcp, uint64_t *block) +{ + uint16_t ctl = (OVS_FORCE uint16_t) TCP_FLAGS_BE16(tcp->tcp_ctl); + uint64_t ctl_u64 = ctl; + *block = ctl_u64 << 32; +} + /* Generic loop to process any mfex profile. This code is specialized into * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE * to ensure the compiler specializes each instance. The code is marked "hot" @@ -381,6 +493,43 @@ mfex_avx512_process(struct dp_packet_batch *packets, ovs_assert(0); /* avoid compiler warning on missing ENUM */ break; + case PROFILE_ETH_VLAN_IPV4_TCP: { + mfex_vlan_pcp(pkt[14], &keys[i].buf[4]); + + uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; + struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + continue; + } + + /* Process TCP flags, and store to blocks. */ + const struct tcp_header *tcp = (void *)&pkt[38]; + mfex_handle_tcp_flags(tcp, &blocks[7]); + } break; + + case PROFILE_ETH_VLAN_IPV4_UDP: { + mfex_vlan_pcp(pkt[14], &keys[i].buf[4]); + + uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; + struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + continue; + } + } break; + + case PROFILE_ETH_IPV4_TCP: { + /* Process TCP flags, and store to blocks. */ + const struct tcp_header *tcp = (void *)&pkt[34]; + mfex_handle_tcp_flags(tcp, &blocks[6]); + + /* Handle dynamic l2_pad_size. */ + uint32_t size_from_ipv4 = size - sizeof(struct eth_header); + struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)]; + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + continue; + } + } break; + case PROFILE_ETH_IPV4_UDP: { /* Handle dynamic l2_pad_size. */ uint32_t size_from_ipv4 = size - sizeof(struct eth_header); @@ -432,6 +581,9 @@ mfex_avx512_##name(struct dp_packet_batch *packets, \ * as required. */ DECLARE_MFEX_FUNC(ip_udp, PROFILE_ETH_IPV4_UDP) +DECLARE_MFEX_FUNC(ip_tcp, PROFILE_ETH_IPV4_TCP) +DECLARE_MFEX_FUNC(dot1q_ip_udp, PROFILE_ETH_VLAN_IPV4_UDP) +DECLARE_MFEX_FUNC(dot1q_ip_tcp, PROFILE_ETH_VLAN_IPV4_TCP) static int32_t diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c index fb9219ce2..8bd05df5b 100644 --- a/lib/dpif-netdev-private-extract.c +++ b/lib/dpif-netdev-private-extract.c @@ -64,6 +64,36 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = { .probe = mfex_avx512_probe, .extract_func = mfex_avx512_ip_udp, .name = "avx512_ipv4_udp", }, + + [MFEX_IMPL_VMBI_IPv4_TCP] = { + .probe = mfex_avx512_vbmi_probe, + .extract_func = mfex_avx512_vbmi_ip_tcp, + .name = "avx512_vbmi_ipv4_tcp", }, + + [MFEX_IMPL_IPv4_TCP] = { + .probe = mfex_avx512_probe, + .extract_func = mfex_avx512_ip_tcp, + .name = "avx512_ipv4_tcp", }, + + [MFEX_IMPL_VMBI_DOT1Q_IPv4_UDP] = { + .probe = mfex_avx512_vbmi_probe, + .extract_func = mfex_avx512_vbmi_dot1q_ip_udp, + .name = "avx512_vbmi_dot1q_ipv4_udp", }, + + [MFEX_IMPL_DOT1Q_IPv4_UDP] = { + .probe = mfex_avx512_probe, + .extract_func = mfex_avx512_dot1q_ip_udp, + .name = "avx512_dot1q_ipv4_udp", }, + + [MFEX_IMPL_VMBI_DOT1Q_IPv4_TCP] = { + .probe = mfex_avx512_vbmi_probe, + .extract_func = mfex_avx512_vbmi_dot1q_ip_tcp, + .name = "avx512_vbmi_dot1q_ipv4_tcp", }, + + [MFEX_IMPL_DOT1Q_IPv4_TCP] = { + .probe = mfex_avx512_probe, + .extract_func = mfex_avx512_dot1q_ip_tcp, + .name = "avx512_dot1q_ipv4_tcp", }, #endif }; diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h index e6660ce7d..f9a757ba4 100644 --- a/lib/dpif-netdev-private-extract.h +++ b/lib/dpif-netdev-private-extract.h @@ -84,6 +84,12 @@ enum dpif_miniflow_extract_impl_idx { #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) MFEX_IMPL_VMBI_IPv4_UDP, MFEX_IMPL_IPv4_UDP, + MFEX_IMPL_VMBI_IPv4_TCP, + MFEX_IMPL_IPv4_TCP, + MFEX_IMPL_VMBI_DOT1Q_IPv4_UDP, + MFEX_IMPL_DOT1Q_IPv4_UDP, + MFEX_IMPL_VMBI_DOT1Q_IPv4_TCP, + MFEX_IMPL_DOT1Q_IPv4_TCP, #endif MFEX_IMPL_MAX }; @@ -188,6 +194,10 @@ int32_t mfex_avx512_vbmi_probe(void); *pmd_handle); \ DECLARE_AVX512_MFEX_PROTOTYPE(ip_udp); +DECLARE_AVX512_MFEX_PROTOTYPE(ip_tcp); +DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_udp); +DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_tcp); + #endif /* __x86_64__ */ #endif /* MFEX_AVX512_EXTRACT */ -- 2.25.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev