From: Harry van Haaren <harry.van.haa...@intel.com> This commit adds AVX512 implementations of miniflow extract. By using the 64 bytes available in an AVX512 register, it is possible to convert a packet to a miniflow data-structure in a small quantity instructions.
The implementation here probes for Ether()/IP()/UDP() traffic, and builds the appropriate miniflow data-structure for packets that match the probe. The implementation here is auto-validated by the miniflow extract autovalidator, hence its correctness can be easily tested and verified. Note that this commit is designed to easily allow addition of new traffic profiles in a scalable way, without code duplication for each traffic profile. Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> Acked-by: Eelco Chaudron <echau...@redhat.com> --- v9: - include comments from flavio v8: - include documentation on AVX512 MFEX as per Eelco's suggestion v7: - fix minor review sentences (Eelco) v5: - fix review comments(Ian, Flavio, Eelco) - inlcude assert for flow abi change - include assert for offset changes --- --- lib/automake.mk | 1 + lib/dpif-netdev-extract-avx512.c | 478 ++++++++++++++++++++++++++++++ lib/dpif-netdev-private-extract.c | 13 + lib/dpif-netdev-private-extract.h | 30 ++ 4 files changed, 522 insertions(+) create mode 100644 lib/dpif-netdev-extract-avx512.c diff --git a/lib/automake.mk b/lib/automake.mk index f4f36325e..299f81939 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -39,6 +39,7 @@ lib_libopenvswitchavx512_la_CFLAGS = \ $(AM_CFLAGS) lib_libopenvswitchavx512_la_SOURCES = \ lib/dpif-netdev-lookup-avx512-gather.c \ + lib/dpif-netdev-extract-avx512.c \ lib/dpif-netdev-avx512.c lib_libopenvswitchavx512_la_LDFLAGS = \ -static diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c new file mode 100644 index 000000000..bb0e7facc --- /dev/null +++ b/lib/dpif-netdev-extract-avx512.c @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2021 Intel. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * AVX512 Miniflow Extract. + * + * This file contains optimized implementations of miniflow_extract() + * for specific common traffic patterns. The optimizations allow for + * quick probing of a specific packet type, and if a match with a specific + * type is found, a shuffle like procedure builds up the required miniflow. + * + * Process + * --------- + * + * The procedure is to classify the packet based on the traffic type + * using predifined bit-masks and arrage the packet header data using shuffle + * instructions to a pre-defined place as required by the miniflow. + * This elimates the if-else ladder to identify the packet data and add data + * as per protocol which is present. + */ + +#ifdef __x86_64__ +/* Sparse cannot handle the AVX512 instructions. */ +#if !defined(__CHECKER__) + +#include <config.h> +#include <errno.h> +#include <immintrin.h> +#include <stdint.h> +#include <string.h> + +#include "flow.h" +#include "dpdk.h" + +#include "dpif-netdev-private-dpcls.h" +#include "dpif-netdev-private-extract.h" +#include "dpif-netdev-private-flow.h" + +/* AVX512-BW level permutex2var_epi8 emulation. */ +static inline __m512i +__attribute__((target("avx512bw"))) +_mm512_maskz_permutex2var_epi8_skx(__mmask64 k_mask, + __m512i v_data_0, + __m512i v_shuf_idxs, + __m512i v_data_1) +{ + /* Manipulate shuffle indexes for u16 size. */ + __mmask64 k_mask_odd_lanes = 0xAAAAAAAAAAAAAAAA; + /* Clear away ODD lane bytes. Cannot be done above due to no u8 shift. */ + __m512i v_shuf_idx_evn = _mm512_mask_blend_epi8(k_mask_odd_lanes, + v_shuf_idxs, + _mm512_setzero_si512()); + v_shuf_idx_evn = _mm512_srli_epi16(v_shuf_idx_evn, 1); + + __m512i v_shuf_idx_odd = _mm512_srli_epi16(v_shuf_idxs, 9); + + /* Shuffle each half at 16-bit width. */ + __m512i v_shuf1 = _mm512_permutex2var_epi16(v_data_0, v_shuf_idx_evn, + v_data_1); + __m512i v_shuf2 = _mm512_permutex2var_epi16(v_data_0, v_shuf_idx_odd, + v_data_1); + + /* Find if the shuffle index was odd, via mask and compare. */ + uint16_t index_odd_mask = 0x1; + const __m512i v_index_mask_u16 = _mm512_set1_epi16(index_odd_mask); + + /* EVEN lanes, find if u8 index was odd, result as u16 bitmask. */ + __m512i v_idx_even_masked = _mm512_and_si512(v_shuf_idxs, + v_index_mask_u16); + __mmask32 evn_rotate_mask = _mm512_cmpeq_epi16_mask(v_idx_even_masked, + v_index_mask_u16); + + /* ODD lanes, find if u8 index was odd, result as u16 bitmask. */ + __m512i v_shuf_idx_srli8 = _mm512_srli_epi16(v_shuf_idxs, 8); + __m512i v_idx_odd_masked = _mm512_and_si512(v_shuf_idx_srli8, + v_index_mask_u16); + __mmask32 odd_rotate_mask = _mm512_cmpeq_epi16_mask(v_idx_odd_masked, + v_index_mask_u16); + odd_rotate_mask = ~odd_rotate_mask; + + /* Rotate and blend results from each index. */ + __m512i v_shuf_res_evn = _mm512_mask_srli_epi16(v_shuf1, evn_rotate_mask, + v_shuf1, 8); + __m512i v_shuf_res_odd = _mm512_mask_slli_epi16(v_shuf2, odd_rotate_mask, + v_shuf2, 8); + + /* If shuffle index was odd, blend shifted version. */ + __m512i v_shuf_result = _mm512_mask_blend_epi8(k_mask_odd_lanes, + v_shuf_res_evn, v_shuf_res_odd); + + __m512i v_zeros = _mm512_setzero_si512(); + __m512i v_result_kmskd = _mm512_mask_blend_epi8(k_mask, v_zeros, + v_shuf_result); + + return v_result_kmskd; +} + +/* Wrapper function required to enable ISA. */ +static inline __m512i +__attribute__((__target__("avx512vbmi"))) +_mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) +{ + return _mm512_maskz_permutexvar_epi8(kmask, idx, a); +} + + +/* This file contains optimized implementations of miniflow_extract() + * for specific common traffic patterns. The optimizations allow for + * quick probing of a specific packet type, and if a match with a specific + * type is found, a shuffle like procedure builds up the required miniflow. + * + * The functionality here can be easily auto-validated and tested against the + * scalar miniflow_extract() function. As such, manual review of the code by + * the community (although welcome) is not required. Confidence in the + * correctness of the code can be confirmed from the autovalidator results. + */ + +/* Generator for EtherType masks and values. */ +#define PATTERN_ETHERTYPE_GEN(type_b0, type_b1) \ + 0, 0, 0, 0, 0, 0, /* Ether MAC DST */ \ + 0, 0, 0, 0, 0, 0, /* Ether MAC SRC */ \ + type_b0, type_b1, /* EtherType */ + +#define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF) +#define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00) + +/* Generator for checking IPv4 ver, ihl, and proto */ +#define PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \ + VER_IHL, /* Version and IHL */ \ + 0, 0, 0, /* DSCP, ECN, Total Length */ \ + 0, 0, /* Identification */ \ + /* Flags/Fragment offset: don't match MoreFrag (MF) or FragOffset */ \ + FLAG_OFF_B0, FLAG_OFF_B1, \ + 0, /* TTL */ \ + PROTO, /* Protocol */ \ + 0, 0, /* Header checksum */ \ + 0, 0, 0, 0, /* Src IP */ \ + 0, 0, 0, 0, /* Dst IP */ + +#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF) +#define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11) +#define PATTERN_IPV4_TCP PATTERN_IPV4_GEN(0x45, 0, 0, 0x06) + +#define NU 0 +#define PATTERN_IPV4_UDP_SHUFFLE \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether */ \ + 26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* IPv4 */ \ + 34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* UDP */ \ + NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */ + + +/* Generation of K-mask bitmask values, to zero out data in result. Note that + * these correspond 1:1 to the above "*_SHUFFLE" values, and bit used must be + * set in this K-mask, and "NU" values must be zero in the k-mask. Each mask + * defined here represents 2 blocks, so 16 bytes, so 4 characters (eg. 0xFFFF). + * + * Note the ULL suffix allows shifting by 32 or more without integer overflow. + */ +#define KMASK_ETHER 0x1FFFULL +#define KMASK_IPV4 0xF0FFULL +#define KMASK_UDP 0x000FULL + +#define PATTERN_IPV4_UDP_KMASK \ + (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_UDP << 32)) + + +/* This union allows initializing static data as u8, but easily loading it + * into AVX512 registers too. The union ensures proper alignment for the zmm. + */ +union mfex_data { + uint8_t u8_data[64]; + __m512i zmm; +}; + +/* This structure represents a single traffic pattern. The AVX512 code to + * enable the specifics for each pattern is largely the same, so it is + * specialized to use the common profile data from here. + * + * Due to the nature of e.g. TCP flag handling, or VLAN CFI bit setting, + * some profiles require additional processing. This is handled by having + * all implementations call a post-process function, and specializing away + * the big switch() that handles all traffic types. + * + * This approach reduces AVX512 code-duplication for each traffic type. + */ +struct mfex_profile { + /* Required for probing a packet with the mfex pattern. */ + union mfex_data probe_mask; + union mfex_data probe_data; + + /* Required for reshaping packet into miniflow. */ + union mfex_data store_shuf; + __mmask64 store_kmsk; + + /* Constant data to set in mf.bits and dp_packet data on hit. */ + uint64_t mf_bits[FLOWMAP_UNITS]; + uint16_t dp_pkt_offs[4]; + uint16_t dp_pkt_min_size; +}; + +/* Ensure dp_pkt_offs[4] is the correct size as in struct dp_packet. */ +BUILD_ASSERT_DECL((OFFSETOFEND(struct dp_packet, l4_ofs) + - offsetof(struct dp_packet, l2_pad_size)) == + MEMBER_SIZEOF(struct mfex_profile, dp_pkt_offs)); + +/* Ensure FLOWMAP_UNITS is 2 units, as the implementation assumes this. */ +BUILD_ASSERT_DECL(FLOWMAP_UNITS == 2); + +/* Ensure the miniflow-struct ABI is the expected version. */ +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); + +/* If the above build assert happens, this means that you might need to make + * some modifications to the AVX512 miniflow extractor code. In general, the + * AVX512 flow extractor code uses hardcoded miniflow->map->bits which are + * defined into the mfex_profile structure as mf_bits. In addition to the + * hardcoded bits, it also has hardcoded offsets/masks that tell the AVX512 + * code how to translate packet data in the required miniflow values. These + * are stored in the mfex_profile structure as store_shuf and store_kmsk. + * See the respective documentation on their usage. + * + * If you have made changes to the flow structure, but only additions, no + * re-arranging of the actual members, you might be good to go. To be 100% + * sure, if possible, run the AVX512 MFEX autovalidator tests on an AVX512 + * enabled machine. + * + * If you did make changes to the order, you have to run the autovalidator + * tests on an AVX512 machine, and and in the case errors, the debug output + * will show what miniflow or dp_packet properties are not being correctly + * built from the input packet. + * + * In case your change increased the maximum size of the map, i.e., + * FLOWMAP_UNITS, you need to study the code as it will need some rewriting. + * + * If you are not using the AVX512 MFEX implementation at all, i.e. keeping it + * to the default scalar implementation, see "ovs-appctl + * dpif-netdev/miniflow-parser-get", you could ignore this assert, and just + * just increase the FLOW_WC_SEQ number in the assert. + */ + +enum MFEX_PROFILES { + PROFILE_ETH_IPV4_UDP, + PROFILE_COUNT, +}; + +/* Static const instances of profiles. These are compile-time constants, + * and are specialized into individual miniflow-extract functions. + * NOTE: Order of the fields is significant, any change in the order must be + * reflected in miniflow_extract()! + */ +static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = +{ + [PROFILE_ETH_IPV4_UDP] = { + .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK }, + .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_UDP}, + + .store_shuf.u8_data = { PATTERN_IPV4_UDP_SHUFFLE }, + .store_kmsk = PATTERN_IPV4_UDP_KMASK, + + .mf_bits = { 0x18a0000000000000, 0x0000000000040401}, + .dp_pkt_offs = { + 0, UINT16_MAX, 14, 34, + }, + .dp_pkt_min_size = 42, + }, +}; + + +/* Protocol specific helper functions, for calculating offsets/lenghts. */ +static int32_t +mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh, + uint32_t len_from_ipv4) +{ + /* Handle dynamic l2_pad_size. */ + uint16_t tot_len = ntohs(nh->ip_tot_len); + if (OVS_UNLIKELY(tot_len > len_from_ipv4 || + (len_from_ipv4 - tot_len) > UINT16_MAX)) { + return -1; + } + dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - tot_len); + return 0; +} + +/* Generic loop to process any mfex profile. This code is specialized into + * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE + * to ensure the compiler specializes each instance. The code is marked "hot" + * to inform the compiler this is a hotspot in the program, encouraging + * inlining of callee functions such as the permute calls. + */ +static inline uint32_t ALWAYS_INLINE +__attribute__ ((hot)) +mfex_avx512_process(struct dp_packet_batch *packets, + struct netdev_flow_key *keys, + uint32_t keys_size OVS_UNUSED, + odp_port_t in_port, + void *pmd_handle OVS_UNUSED, + const enum MFEX_PROFILES profile_id, + const uint32_t use_vbmi) +{ + uint32_t hitmask = 0; + struct dp_packet *packet; + + /* Here the profile to use is chosen by the variable used to specialize + * the function. This causes different MFEX traffic to be handled. + */ + const struct mfex_profile *profile = &mfex_profiles[profile_id]; + + /* Load profile constant data. */ + __m512i v_vals = _mm512_loadu_si512(&profile->probe_data); + __m512i v_mask = _mm512_loadu_si512(&profile->probe_mask); + __m512i v_shuf = _mm512_loadu_si512(&profile->store_shuf); + + __mmask64 k_shuf = profile->store_kmsk; + __m128i v_bits = _mm_loadu_si128((void *) &profile->mf_bits); + uint16_t dp_pkt_min_size = profile->dp_pkt_min_size; + + __m128i v_zeros = _mm_setzero_si128(); + __m128i v_blocks01 = _mm_insert_epi32(v_zeros, odp_to_u32(in_port), 1); + + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { + /* If the packet is smaller than the probe size, skip it. */ + const uint32_t size = dp_packet_size(packet); + if (size < dp_pkt_min_size) { + continue; + } + + /* Load packet data and probe with AVX512 mask & compare. */ + const uint8_t *pkt = dp_packet_data(packet); + __m512i v_pkt0 = _mm512_loadu_si512(pkt); + __m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_mask); + __mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, v_vals); + if (k_cmp != UINT64_MAX) { + continue; + } + + /* Copy known dp packet offsets to the dp_packet instance. */ + memcpy(&packet->l2_pad_size, &profile->dp_pkt_offs, + sizeof(uint16_t) * 4); + + /* Store known miniflow bits and first two blocks. */ + struct miniflow *mf = &keys[i].mf; + uint64_t *bits = (void *) &mf->map.bits[0]; + uint64_t *blocks = miniflow_values(mf); + _mm_storeu_si128((void *) bits, v_bits); + _mm_storeu_si128((void *) blocks, v_blocks01); + + /* Permute the packet layout into miniflow blocks shape. + * As different AVX512 ISA levels have different implementations, + * this specializes on the "use_vbmi" attribute passed in. + */ + __m512i v512_zeros = _mm512_setzero_si512(); + __m512i v_blk0 = v512_zeros; + if (__builtin_constant_p(use_vbmi) && use_vbmi) { + v_blk0 = _mm512_maskz_permutexvar_epi8_wrap(k_shuf, v_shuf, + v_pkt0); + } else { + v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0, + v_shuf, v512_zeros); + } + _mm512_storeu_si512(&blocks[2], v_blk0); + + + /* Perform "post-processing" per profile, handling details not easily + * handled in the above generic AVX512 code. Examples include TCP flag + * parsing, adding the VLAN CFI bit, and handling IPv4 fragments. + */ + switch (profile_id) { + case PROFILE_COUNT: + ovs_assert(0); /* avoid compiler warning on missing ENUM */ + break; + + case PROFILE_ETH_IPV4_UDP: { + /* Handle dynamic l2_pad_size. */ + uint32_t size_from_ipv4 = size - sizeof(struct eth_header); + struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)]; + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + continue; + } + + } break; + default: + break; + }; + + /* This packet has its miniflow created, add to hitmask. */ + hitmask |= 1 << i; + } + + return hitmask; +} + + +#define DECLARE_MFEX_FUNC(name, profile) \ +uint32_t \ +__attribute__((__target__("avx512f"))) \ +__attribute__((__target__("avx512vl"))) \ +__attribute__((__target__("avx512vbmi"))) \ +mfex_avx512_vbmi_##name(struct dp_packet_batch *packets, \ + struct netdev_flow_key *keys, uint32_t keys_size,\ + odp_port_t in_port, struct dp_netdev_pmd_thread \ + *pmd_handle) \ +{ \ + return mfex_avx512_process(packets, keys, keys_size, in_port, \ + pmd_handle, profile, 1); \ +} \ + \ +uint32_t \ +__attribute__((__target__("avx512f"))) \ +__attribute__((__target__("avx512vl"))) \ +mfex_avx512_##name(struct dp_packet_batch *packets, \ + struct netdev_flow_key *keys, uint32_t keys_size, \ + odp_port_t in_port, struct dp_netdev_pmd_thread \ + *pmd_handle) \ +{ \ + return mfex_avx512_process(packets, keys, keys_size, in_port, \ + pmd_handle, profile, 0); \ +} + +/* Each profile gets a single declare here, which specializes the function + * as required. + */ +DECLARE_MFEX_FUNC(ip_udp, PROFILE_ETH_IPV4_UDP) + + +static int32_t +avx512_isa_probe(uint32_t needs_vbmi) +{ + static const char *isa_required[] = { + "avx512f", + "avx512bw", + "bmi2", + }; + + int32_t ret = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) { + if (!dpdk_get_cpu_has_isa("x86_64", isa_required[i])) { + ret = -ENOTSUP; + } + } + + if (needs_vbmi) { + if (!dpdk_get_cpu_has_isa("x86_64", "avx512vbmi")) { + ret = -ENOTSUP; + } + } + + return ret; +} + +/* Probe functions to check ISA requirements. */ +int32_t +mfex_avx512_probe(void) +{ + const uint32_t needs_vbmi = 0; + return avx512_isa_probe(needs_vbmi); +} + +int32_t +mfex_avx512_vbmi_probe(void) +{ + const uint32_t needs_vbmi = 1; + return avx512_isa_probe(needs_vbmi); +} + +#endif /* __CHECKER__ */ +#endif /* __x86_64__ */ diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c index b4d538b92..d6461ec49 100644 --- a/lib/dpif-netdev-private-extract.c +++ b/lib/dpif-netdev-private-extract.c @@ -52,6 +52,19 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = { .probe = NULL, .extract_func = mfex_study_traffic, .name = "study", }, + +/* Compile in implementations only if the compiler ISA checks pass. */ +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) + [MFEX_IMPL_VMBI_IPv4_UDP] = { + .probe = mfex_avx512_vbmi_probe, + .extract_func = mfex_avx512_vbmi_ip_udp, + .name = "avx512_vbmi_ipv4_udp", }, + + [MFEX_IMPL_IPv4_UDP] = { + .probe = mfex_avx512_probe, + .extract_func = mfex_avx512_ip_udp, + .name = "avx512_ipv4_udp", }, +#endif }; BUILD_ASSERT_DECL(MFEX_IMPL_MAX == ARRAY_SIZE(mfex_impls)); diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h index b93fecbbc..e6660ce7d 100644 --- a/lib/dpif-netdev-private-extract.h +++ b/lib/dpif-netdev-private-extract.h @@ -81,6 +81,10 @@ enum dpif_miniflow_extract_impl_idx { MFEX_IMPL_AUTOVALIDATOR, MFEX_IMPL_SCALAR, MFEX_IMPL_STUDY, +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) + MFEX_IMPL_VMBI_IPv4_UDP, + MFEX_IMPL_IPv4_UDP, +#endif MFEX_IMPL_MAX }; @@ -89,9 +93,15 @@ extern struct ovs_mutex dp_netdev_mutex; /* Define a index which points to the first traffic optimized MFEX * option from the enum list else holds max value. */ +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) + +#define MFEX_IMPL_START_IDX MFEX_IMPL_VMBI_IPv4_UDP +#else #define MFEX_IMPL_START_IDX MFEX_IMPL_MAX +#endif + /* Max count of packets to be compared. */ #define MFEX_MAX_PKT_COUNT (128) @@ -160,4 +170,24 @@ mfex_study_traffic(struct dp_packet_batch *packets, int mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char *name); +/* AVX512 MFEX Probe and Implementations functions. */ +#ifdef __x86_64__ +int32_t mfex_avx512_probe(void); +int32_t mfex_avx512_vbmi_probe(void); + +#define DECLARE_AVX512_MFEX_PROTOTYPE(name) \ + uint32_t \ + mfex_avx512_vbmi_##name(struct dp_packet_batch *packets, \ + struct netdev_flow_key *keys, uint32_t keys_size,\ + odp_port_t in_port, struct dp_netdev_pmd_thread \ + *pmd_handle); \ + uint32_t \ + mfex_avx512_##name(struct dp_packet_batch *packets, \ + struct netdev_flow_key *keys, uint32_t keys_size, \ + odp_port_t in_port, struct dp_netdev_pmd_thread \ + *pmd_handle); \ + +DECLARE_AVX512_MFEX_PROTOTYPE(ip_udp); +#endif /* __x86_64__ */ + #endif /* MFEX_AVX512_EXTRACT */ -- 2.25.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev