Hi Ian, Pls find my replies inline and Thanks again for review.
BR Amber > -----Original Message----- > From: Stokes, Ian <ian.sto...@intel.com> > Sent: Tuesday, June 29, 2021 10:11 PM > To: Amber, Kumar <kumar.am...@intel.com>; d...@openvswitch.org > Cc: i.maxim...@ovn.org > Subject: RE: [ovs-dev] [v4 10/12] dpif-netdev/mfex: Add AVX512 based > optimized miniflow extract > > > From: Harry van Haaren <harry.van.haa...@intel.com> > > > > This commit adds AVX512 implementations of miniflow extract. > > By using the 64 bytes available in an AVX512 register, it is possible > > to convert a packet to a miniflow data-structure in a small quantity > > instructions. > > > > The implementation here probes for Ether()/IP()/UDP() traffic, and > > builds the appropriate miniflow data-structure for packets that match > > the probe. > > > > The implementation here is auto-validated by the miniflow extract > > autovalidator, hence its correctness can be easily tested and > > verified. > > > > Note that this commit is designed to easily allow addition of new > > traffic profiles in a scalable way, without code duplication for each > > traffic profile. > > > > Thanks Harry/Amber. > > Agree with what Flavio has proposed so far as well. A few more comments > inline below. > > Note: A few comments refer to Comment coding style, I haven't called out > every instance as there are quite a few, but would recommend giving the > comments in particularly a look over to ensure they meet standards. > > BR > Ian > > Signed-off-by: Harry van Haaren <harry.van.haa...@intel.com> > > --- > > lib/automake.mk | 1 + > > lib/dpif-netdev-extract-avx512.c | 416 > > ++++++++++++++++++++++++++++++ lib/dpif-netdev-private-extract.c | > > 15 ++ lib/dpif-netdev-private-extract.h | 19 ++ > > 4 files changed, 451 insertions(+) > > create mode 100644 lib/dpif-netdev-extract-avx512.c > > > > diff --git a/lib/automake.mk b/lib/automake.mk index > > 3080bb04a..2b95d6f92 100644 > > --- a/lib/automake.mk > > +++ b/lib/automake.mk > > @@ -39,6 +39,7 @@ lib_libopenvswitchavx512_la_CFLAGS = \ > > $(AM_CFLAGS) > > lib_libopenvswitchavx512_la_SOURCES = \ > > lib/dpif-netdev-lookup-avx512-gather.c \ > > +lib/dpif-netdev-extract-avx512.c \ > > lib/dpif-netdev-avx512.c > > lib_libopenvswitchavx512_la_LDFLAGS = \ -static diff --git > > a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c > > new file mode 100644 > > index 000000000..1145ac8a9 > > --- /dev/null > > +++ b/lib/dpif-netdev-extract-avx512.c > > @@ -0,0 +1,416 @@ > > +/* > > + * Copyright (c) 2021 Intel. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, > > +software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > > implied. > > + * See the License for the specific language governing permissions > > + and > > + * limitations under the License. > > + */ > > + > > +#ifdef __x86_64__ > > +/* Sparse cannot handle the AVX512 instructions. */ #if > > +!defined(__CHECKER__) > > + > > +#include <config.h> > > +#include <errno.h> > > +#include <immintrin.h> > > +#include <stdint.h> > > +#include <string.h> > > + > > +#include "flow.h" > > +#include "dpdk.h" > > + > > +#include "dpif-netdev-private-dpcls.h" > > +#include "dpif-netdev-private-extract.h" > > +#include "dpif-netdev-private-flow.h" > > + > > +/* AVX512-BW level permutex2var_epi8 emulation. */ static inline > > +__m512i > > +__attribute__((target("avx512bw"))) > > +_mm512_maskz_permutex2var_epi8_skx(__mmask64 k_mask, > > + __m512i v_data_0, > > + __m512i v_shuf_idxs, > > + __m512i v_data_1) { > > + /* Manipulate shuffle indexes for u16 size. */ > > + __mmask64 k_mask_odd_lanes = 0xAAAAAAAAAAAAAAAA; > > + /* clear away ODD lane bytes. Cannot be done above due to no u8 > > +shift */ > Coding standard for comments. Capitalize Clear and add period at end of > comment. > Fixed all in v5. > > + __m512i v_shuf_idx_evn = > _mm512_mask_blend_epi8(k_mask_odd_lanes, > > + v_shuf_idxs, _mm512_setzero_si512()); > Alignment of arguments above seems a bit odd. Can we align vertically > under k_mask_odd_lanes? > > > + v_shuf_idx_evn = _mm512_srli_epi16(v_shuf_idx_evn, 1); > > + > > + __m512i v_shuf_idx_odd = _mm512_srli_epi16(v_shuf_idxs, 9); > > + > > + /* Shuffle each half at 16-bit width */ > For the comment above and multiple comments below, please add period at > end of comment to keep with standard. > Fixed all below as well in V5. > > + __m512i v_shuf1 = _mm512_permutex2var_epi16(v_data_0, > > v_shuf_idx_evn, > > + v_data_1); > > + __m512i v_shuf2 = _mm512_permutex2var_epi16(v_data_0, > > v_shuf_idx_odd, > > + v_data_1); > > + > > + /* Find if the shuffle index was odd, via mask and compare */ > > + uint16_t index_odd_mask = 0x1; > > + const __m512i v_index_mask_u16 = > > + _mm512_set1_epi16(index_odd_mask); > > + > > + /* EVEN lanes, find if u8 index was odd, result as u16 bitmask */ > > + __m512i v_idx_even_masked = _mm512_and_si512(v_shuf_idxs, > > + v_index_mask_u16); > > + __mmask32 evn_rotate_mask = > > _mm512_cmpeq_epi16_mask(v_idx_even_masked, > > + > > + v_index_mask_u16); > > + > > + /* ODD lanes, find if u8 index was odd, result as u16 bitmask */ > > + __m512i v_shuf_idx_srli8 = _mm512_srli_epi16(v_shuf_idxs, 8); > > + __m512i v_idx_odd_masked = _mm512_and_si512(v_shuf_idx_srli8, > > + v_index_mask_u16); > > + __mmask32 odd_rotate_mask = > > _mm512_cmpeq_epi16_mask(v_idx_odd_masked, > > + v_index_mask_u16); > > + odd_rotate_mask = ~odd_rotate_mask; > > + > > + /* Rotate and blend results from each index */ > > + __m512i v_shuf_res_evn = _mm512_mask_srli_epi16(v_shuf1, > > evn_rotate_mask, > > + v_shuf1, 8); > > + __m512i v_shuf_res_odd = _mm512_mask_slli_epi16(v_shuf2, > > odd_rotate_mask, > > + v_shuf2, 8); > > + > > + /* If shuffle index was odd, blend shifted version */ > > + __m512i v_shuf_result = > _mm512_mask_blend_epi8(k_mask_odd_lanes, > > + v_shuf_res_evn, > > + v_shuf_res_odd); > > + > > + __m512i v_zeros = _mm512_setzero_si512(); > > + __m512i v_result_kmskd = _mm512_mask_blend_epi8(k_mask, v_zeros, > > + v_shuf_result); > > + > > + return v_result_kmskd; > > +} > > + > > +/* Wrapper function required to enable ISA. */ static inline __m512i > > +__attribute__((__target__("avx512vbmi"))) > > +_mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i > idx, > > __m512i a) > > +{ > > + return _mm512_maskz_permutexvar_epi8(kmask, idx, a); } > > + > > + > > +/* This file contains optimized implementations of miniflow_extract() > > + * for specific common traffic patterns. The optimizations allow for > > + * quick probing of a specific packet type, and if a match with a > > +specific > > + * type is found, a shuffle like proceedure builds up the required > miniflow. > > + * > > + * The functionality here can be easily auto-validated and tested > > +against the > > + * scalar miniflow_extract() function. As such, manual review of the > > +code by > > + * the community (although welcome) is not required. Confidence in > > +the > > + * correctness of the code can be had from the autovalidation. > > + */ > > + > > +/* Generator for EtherType masks and values. */ #define > > +PATTERN_ETHERTYPE_GEN(type_b0, type_b1) \ > > + 0, 0, 0, 0, 0, 0, /* Ether MAC DST */ \ > > + 0, 0, 0, 0, 0, 0, /* Ether MAC SRC */ \ > > + type_b0, type_b1, /* EtherType */ > > + > > +#define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, > 0xFF) > > +#define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, > 0x00) > > + > > +/* Generator for checking IPv4 ver, ihl, and proto */ #define > > +PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \ > > + VER_IHL, /* Version and IHL */ \ > > + 0, 0, 0, /* DSCP, ECN, Total Lenght */ \ > Typo above, Length. > Fixed in v5. > > + 0, 0, /* Identification */ \ > > + /* Flags/Fragment offset: don't match MoreFrag (MF) or FragOffset */ \ > > + FLAG_OFF_B0, FLAG_OFF_B1, \ > > + 0, /* TTL */ \ > > + PROTO, /* Protocol */ \ > > + 0, 0, /* Header checksum */ \ > > + 0, 0, 0, 0, /* Src IP */ \ > > + 0, 0, 0, 0, /* Dst IP */ > > + > > +#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF) > > +#define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11) #define > > +PATTERN_IPV4_TCP PATTERN_IPV4_GEN(0x45, 0, 0, 0x06) > > + > > +#define NU 0 > > +#define PATTERN_IPV4_UDP_SHUFFLE \ > > + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* > > +Ether */ \ > > + 26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* > > +IPv4 */ \ > > + 34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* > > +UDP > > */ \ > > + NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* > > Unused. */ > > + > > + > > +/* Generation of K-mask bitmask values, to zero out data in result. > > +Note that > > + * these correspond 1:1 to the above "*_SHUFFLE" values, and bit used > > +must be > > + * set in this K-mask, and "NU" values must be zero in the k-mask. > > +Each mask > > + * defined here represents 2 blocks, so 16 bytes, so 4 characters (eg. > 0xFFFF). > > + * > > + * Note the ULL suffix allows shifting by 32 or more without integer > overflow. > > + */ > > +#define KMASK_ETHER 0x1FFFULL > > +#define KMASK_IPV4 0xF0FFULL > > +#define KMASK_UDP 0x000FULL > > + > > +#define PATTERN_IPV4_UDP_KMASK \ > > + (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_UDP << 32)) > > + > > + > > +/* This union allows initializing static data as u8, but easily > > +loading it > > + * into AVX512 registers too. The union ensures proper alignment for the > zmm. > > + */ > > +union mfex_data { > > + uint8_t u8_data[64]; > > + __m512i zmm; > > +}; > > + > > +/* This structure represents a single traffic pattern. The AVX512 > > +code to > > + * enable the specifics for each pattern is largely the same, so it > > +is > > + * specialized to use the common profile data from here. > > + * > > + * Due to the nature of e.g. TCP flag handling, or VLAN CFI bit > > +setting, > > + * some profiles require additional processing. This is handled by > > +having > > + * all implementations call a post-process function, and specializing > > +away > > + * the big switch() that handles all traffic types. > > + * > > + * This approach reduces AVX512 code-duplication for each traffic type. > > + */ > > +struct mfex_profile { > > + /* Required for probing a packet with the mfex pattern. */ > > + union mfex_data probe_mask; > > + union mfex_data probe_data; > > + > > + /* Required for reshaping packet into miniflow. */ > > + union mfex_data store_shuf; > > + __mmask64 store_kmsk; > > + > > + /* Constant data to set in mf.bits and dp_packet data on hit. */ > > + uint64_t mf_bits[2]; > > + uint16_t dp_pkt_offs[4]; > > + uint16_t dp_pkt_min_size; > > +}; > > + > > +enum MFEX_PROFILES { > > + PROFILE_ETH_IPV4_UDP, > > + PROFILE_COUNT, > > +}; > > + > > +/* Static const instances of profiles. These are compile-time > > +constants, > > + * and are specialized into individual miniflow-extract functions. > > + */ > > +static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = { > > + [PROFILE_ETH_IPV4_UDP] = { > > + .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK > > PATTERN_IPV4_MASK }, > > + .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 > > PATTERN_IPV4_UDP}, > > + > > + .store_shuf.u8_data = { PATTERN_IPV4_UDP_SHUFFLE }, > > + .store_kmsk = PATTERN_IPV4_UDP_KMASK, > > + > > + .mf_bits = { 0x18a0000000000000, 0x0000000000040401}, > > + .dp_pkt_offs = { > > + 0, UINT16_MAX, 14, 34, > > + }, > > + .dp_pkt_min_size = 42, > > + }, > > +}; > > + > > + > > +/* Protocol specific helper functions, for calculating > > +offsets/lenghts. */ static int32_t mfex_ipv4_set_l2_pad_size(struct > > +dp_packet *pkt, struct ip_header *nh, > > + uint32_t len_from_ipv4) { > > + /* Handle dynamic l2_pad_size. */ > > + uint16_t tot_len = ntohs(nh->ip_tot_len); > > + if (OVS_UNLIKELY(tot_len > len_from_ipv4 || > > + (len_from_ipv4 - tot_len) > UINT16_MAX)) { > > + return -1; > > + } > > + dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - tot_len); > > + return 0; > > +} > > + > > +/* Generic loop to process any mfex profile. This code is specialized > > +into > > + * multiple actual MFEX implementation functions. Its marked > > +ALWAYS_INLINE > > + * to ensure the compiler specializes each instance. The code is marked > "hot" > > + * to inform the compiler this is a hotspot in the program, > > +encouraging > > + * inlining of callee functions such as the permute calls. > > + */ > > +static inline uint32_t ALWAYS_INLINE > > +__attribute__ ((hot)) > > +mfex_avx512_process(struct dp_packet_batch *packets, > > + struct netdev_flow_key *keys, > > + uint32_t keys_size OVS_UNUSED, > > + odp_port_t in_port, > > + void *pmd_handle OVS_UNUSED, > > + const enum MFEX_PROFILES profile_id, > > + const uint32_t use_vbmi) { > > + uint32_t hitmask = 0; > > + struct dp_packet *packet; > > + > > + /* Here the profile to use is chosen by the variable used to specialize > > + * the function. This causes different MFEX traffic to be handled. > > + */ > > + const struct mfex_profile *profile = &mfex_profiles[profile_id]; > > + > > + /* Load profile constant data. */ > > + __m512i v_vals = _mm512_loadu_si512(&profile->probe_data); > > + __m512i v_mask = _mm512_loadu_si512(&profile->probe_mask); > > + __m512i v_shuf = _mm512_loadu_si512(&profile->store_shuf); > > + > > + __mmask64 k_shuf = profile->store_kmsk; > > + __m128i v_bits = _mm_loadu_si128((void *) &profile->mf_bits); > > + uint16_t dp_pkt_min_size = profile->dp_pkt_min_size; > > + > > + __m128i v_zeros = _mm_setzero_si128(); > > + __m128i v_blocks01 = _mm_insert_epi32(v_zeros, > > + odp_to_u32(in_port), 1); > > + > > + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { > > + /* If the packet is smaller than the probe size, skip it. */ > > + const uint32_t size = dp_packet_size(packet); > > + if (size < dp_pkt_min_size) { > > + continue; > > + } > > + > > + /* Load packet data and probe with AVX512 mask & compare. */ > > + const uint8_t *pkt = dp_packet_data(packet); > > + __m512i v_pkt0 = _mm512_loadu_si512(pkt); > > + __m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_mask); > > + __mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, > > v_vals); > > + if (k_cmp != UINT64_MAX) { > > + continue; > > + } > > + > > + /* Copy known dp packet offsets to the dp_packet instance. */ > > + memcpy(&packet->l2_pad_size, &profile->dp_pkt_offs, > > + sizeof(uint16_t) * 4); > > + > > + /* Store known miniflow bits and first two blocks. */ > > + struct miniflow *mf = &keys[i].mf; > > + uint64_t *bits = (void *) &mf->map.bits[0]; > > + uint64_t *blocks = miniflow_values(mf); > > + _mm_storeu_si128((void *) bits, v_bits); > > + _mm_storeu_si128((void *) blocks, v_blocks01); > > + > > + /* Permute the packet layout into miniflow blocks shape. > > + * As different avx512 ISA levels have different > > + implementations, > > Minor, but would capitalize AVX512 above, seems to be standard the way it > is referred to in other comments. > Fixed in v5. > > + * this specializes on the "use_vbmi" attribute passed in. > > + */ > > + __m512i v512_zeros = _mm512_setzero_si512(); > > + __m512i v_blk0 = v512_zeros; > > + if (__builtin_constant_p(use_vbmi) && use_vbmi) { > > + v_blk0 = _mm512_maskz_permutexvar_epi8_wrap(k_shuf, v_shuf, > > + v_pkt0); > > + } else { > > + v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0, > > + v_shuf, > > v512_zeros); > > + } > > + _mm512_storeu_si512(&blocks[2], v_blk0); > > + > > + > > + /* Perform "post-processing" per profile, handling details not > > easily > > + * handled in the above generic AVX512 code. Examples include TCP > flag > > + * parsing, adding the VLAN CFI bit, and handling IPv4 fragments. > > + */ > > + switch (profile_id) { > > + case PROFILE_COUNT: > > + ovs_assert(0); /* avoid compiler warning on missing ENUM */ > > + break; > > + > > + case PROFILE_ETH_IPV4_UDP: { > > + /* Handle dynamic l2_pad_size. */ > > + uint32_t size_from_ipv4 = size - sizeof(struct eth_header); > > + struct ip_header *nh = (void *)&pkt[sizeof(struct > > eth_header)]; > > + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) > > { > > + continue; > > + } > > + > > + } break; > > + default: > > + break; > > + }; > > + > > + /* This packet has its miniflow created, add to hitmask. */ > > + hitmask |= 1 << i; > > + } > > + > > + return hitmask; > > +} > > + > > + > > +#define DECLARE_MFEX_FUNC(name, profile) \ > > +uint32_t \ > > +__attribute__((__target__("avx512f"))) \ > > +__attribute__((__target__("avx512vl"))) \ > > +__attribute__((__target__("avx512vbmi"))) \ > > +mfex_avx512_vbmi_##name(struct dp_packet_batch *packets, \ > > + struct netdev_flow_key *keys, uint32_t keys_size, \ > > + odp_port_t in_port, void *pmd_handle) \ > > +{ \ > > + return mfex_avx512_process(packets, keys, keys_size, in_port, \ > > + pmd_handle, profile, 1); \ > > +} \ > > + \ > > +uint32_t \ > > +__attribute__((__target__("avx512f"))) \ > > +__attribute__((__target__("avx512vl"))) \ > > +mfex_avx512_##name(struct dp_packet_batch *packets, \ > > + struct netdev_flow_key *keys, uint32_t keys_size, \ > > + odp_port_t in_port, void *pmd_handle) \ > > +{ \ > > + return mfex_avx512_process(packets, keys, keys_size, in_port, \ > > + pmd_handle, profile, 0); \ > > +} > > + > > +/* Each profile gets a single declare here, which specializes the > > +function > > + * as required. > > + */ > > +DECLARE_MFEX_FUNC(ip_udp,PROFILE_ETH_IPV4_UDP) > > + > > + > > +static int32_t > > +avx512_isa_probe(uint32_t needs_vbmi) { > > + static const char *isa_required[] = { > > + "avx512f", > > + "avx512bw", > > + "bmi2", > > + }; > > + > > + int32_t ret = 0; > > + for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) { > > + if (!dpdk_get_cpu_has_isa("x86_64", isa_required[i])) { > > + ret = -ENOTSUP; > > + } > > + } > > + > > + if (needs_vbmi) { > > + if (!dpdk_get_cpu_has_isa("x86_64", "avx512vbmi")) { > > + ret = -ENOTSUP; > > + } > > + } > > + > > + return ret; > > +} > > + > > +/* Probe functions to check ISA requirements. */ int32_t > > +mfex_avx512_probe(void) > > +{ > > + const uint32_t needs_vbmi = 0; > > + return avx512_isa_probe(needs_vbmi); } > > + > > +int32_t > > +mfex_avx512_vbmi_probe(void) > > +{ > > + const uint32_t needs_vbmi = 1; > > + return avx512_isa_probe(needs_vbmi); } > > + > > +#endif /* __CHECKER__ */ > > +#endif /* __x86_64__ */ > > diff --git a/lib/dpif-netdev-private-extract.c > > b/lib/dpif-netdev-private-extract.c > > index 2008e5ee5..106a83867 100644 > > --- a/lib/dpif-netdev-private-extract.c > > +++ b/lib/dpif-netdev-private-extract.c > > @@ -47,8 +47,23 @@ static struct dpif_miniflow_extract_impl > mfex_impls[] = { > > .extract_func = mfex_study_traffic, > > .name = "study", > > }, > > + > > +/* Compile in implementations only if the compiler ISA checks pass. > > +*/ #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && > > __SSE4_2__) > > + { > > + .probe = mfex_avx512_vbmi_probe, > > + .extract_func = mfex_avx512_vbmi_ip_udp, > > + .name = "avx512_vbmi_ipv4_udp", > > + }, > > + { > > + .probe = mfex_avx512_probe, > > + .extract_func = mfex_avx512_ip_udp, > > + .name = "avx512_ipv4_udp", > > + }, > > +#endif > > }; > > > > + > > BUILD_ASSERT_DECL(MFEX_IMPLS_MAX_SIZE > ARRAY_SIZE(mfex_impls)); > > > > int32_t > > diff --git a/lib/dpif-netdev-private-extract.h > > b/lib/dpif-netdev-private-extract.h > > index 0ec74bef9..f32be202a 100644 > > --- a/lib/dpif-netdev-private-extract.h > > +++ b/lib/dpif-netdev-private-extract.h > > @@ -136,4 +136,23 @@ > > dpif_miniflow_extract_set_default(miniflow_extract_func func); > > uint32_t mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, > > struct dpif_miniflow_extract_impl *opt); > > > > +/* AVX512 MFEX Probe and Implementations functions. */ #ifdef > > +__x86_64__ int32_t mfex_avx512_probe(void); int32_t > > +mfex_avx512_vbmi_probe(void); > > + > > +#define DECLARE_AVX512_MFEX_PROTOTYPE(name) > > \ > > + uint32_t > > \ > > + mfex_avx512_vbmi_##name(struct dp_packet_batch *packets, > \ > > + struct netdev_flow_key *keys, uint32_t keys_size, > > \ > > + odp_port_t in_port, void *pmd_handle); > > \ > > + uint32_t > > \ > > + mfex_avx512_##name(struct dp_packet_batch *packets, > > \ > > + struct netdev_flow_key *keys, uint32_t keys_size, > > \ > > + odp_port_t in_port, void *pmd_handle); > > + > > +DECLARE_AVX512_MFEX_PROTOTYPE(ip_udp); > > +#endif /* __x86_64__ */ > > + > > + > > #endif /* DPIF_NETDEV_AVX512_EXTRACT */ > > -- > > 2.25.1 > > > > _______________________________________________ > > dev mailing list > > d...@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev